diff options
Diffstat (limited to 'net')
61 files changed, 6372 insertions, 2689 deletions
diff --git a/net/9p/Makefile b/net/9p/Makefile index d3abb246ccab..8a1051101898 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile | |||
@@ -4,7 +4,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o | |||
4 | 4 | ||
5 | 9pnet-objs := \ | 5 | 9pnet-objs := \ |
6 | mod.o \ | 6 | mod.o \ |
7 | mux.o \ | ||
8 | client.o \ | 7 | client.o \ |
9 | conv.o \ | 8 | conv.o \ |
10 | error.o \ | 9 | error.o \ |
diff --git a/net/9p/client.c b/net/9p/client.c index af9199364049..84e087e24146 100644 --- a/net/9p/client.c +++ b/net/9p/client.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * 9P Client | 4 | * 9P Client |
5 | * | 5 | * |
6 | * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> | ||
6 | * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> | 7 | * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> |
7 | * | 8 | * |
8 | * This program is free software; you can redistribute it and/or modify | 9 | * This program is free software; you can redistribute it and/or modify |
@@ -25,6 +26,7 @@ | |||
25 | #include <linux/module.h> | 26 | #include <linux/module.h> |
26 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
27 | #include <linux/fs.h> | 28 | #include <linux/fs.h> |
29 | #include <linux/poll.h> | ||
28 | #include <linux/idr.h> | 30 | #include <linux/idr.h> |
29 | #include <linux/mutex.h> | 31 | #include <linux/mutex.h> |
30 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
@@ -32,15 +34,97 @@ | |||
32 | #include <net/9p/9p.h> | 34 | #include <net/9p/9p.h> |
33 | #include <linux/parser.h> | 35 | #include <linux/parser.h> |
34 | #include <net/9p/transport.h> | 36 | #include <net/9p/transport.h> |
35 | #include <net/9p/conn.h> | ||
36 | #include <net/9p/client.h> | 37 | #include <net/9p/client.h> |
37 | 38 | ||
38 | static struct p9_fid *p9_fid_create(struct p9_client *clnt); | 39 | static struct p9_fid *p9_fid_create(struct p9_client *clnt); |
39 | static void p9_fid_destroy(struct p9_fid *fid); | 40 | static void p9_fid_destroy(struct p9_fid *fid); |
40 | static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); | 41 | static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); |
41 | 42 | ||
42 | struct p9_client *p9_client_create(struct p9_trans *trans, int msize, | 43 | /* |
43 | int dotu) | 44 | * Client Option Parsing (code inspired by NFS code) |
45 | * - a little lazy - parse all client options | ||
46 | */ | ||
47 | |||
48 | enum { | ||
49 | Opt_msize, | ||
50 | Opt_trans, | ||
51 | Opt_legacy, | ||
52 | Opt_err, | ||
53 | }; | ||
54 | |||
55 | static match_table_t tokens = { | ||
56 | {Opt_msize, "msize=%u"}, | ||
57 | {Opt_legacy, "noextend"}, | ||
58 | {Opt_trans, "trans=%s"}, | ||
59 | {Opt_err, NULL}, | ||
60 | }; | ||
61 | |||
62 | /** | ||
63 | * v9fs_parse_options - parse mount options into session structure | ||
64 | * @options: options string passed from mount | ||
65 | * @v9ses: existing v9fs session information | ||
66 | * | ||
67 | */ | ||
68 | |||
69 | static void parse_opts(char *options, struct p9_client *clnt) | ||
70 | { | ||
71 | char *p; | ||
72 | substring_t args[MAX_OPT_ARGS]; | ||
73 | int option; | ||
74 | int ret; | ||
75 | |||
76 | clnt->trans_mod = v9fs_default_trans(); | ||
77 | clnt->dotu = 1; | ||
78 | clnt->msize = 8192; | ||
79 | |||
80 | if (!options) | ||
81 | return; | ||
82 | |||
83 | while ((p = strsep(&options, ",")) != NULL) { | ||
84 | int token; | ||
85 | if (!*p) | ||
86 | continue; | ||
87 | token = match_token(p, tokens, args); | ||
88 | if (token < Opt_trans) { | ||
89 | ret = match_int(&args[0], &option); | ||
90 | if (ret < 0) { | ||
91 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
92 | "integer field, but no integer?\n"); | ||
93 | continue; | ||
94 | } | ||
95 | } | ||
96 | switch (token) { | ||
97 | case Opt_msize: | ||
98 | clnt->msize = option; | ||
99 | break; | ||
100 | case Opt_trans: | ||
101 | clnt->trans_mod = v9fs_match_trans(&args[0]); | ||
102 | break; | ||
103 | case Opt_legacy: | ||
104 | clnt->dotu = 0; | ||
105 | break; | ||
106 | default: | ||
107 | continue; | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | |||
112 | |||
113 | /** | ||
114 | * p9_client_rpc - sends 9P request and waits until a response is available. | ||
115 | * The function can be interrupted. | ||
116 | * @c: client data | ||
117 | * @tc: request to be sent | ||
118 | * @rc: pointer where a pointer to the response is stored | ||
119 | */ | ||
120 | int | ||
121 | p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, | ||
122 | struct p9_fcall **rc) | ||
123 | { | ||
124 | return c->trans->rpc(c->trans, tc, rc); | ||
125 | } | ||
126 | |||
127 | struct p9_client *p9_client_create(const char *dev_name, char *options) | ||
44 | { | 128 | { |
45 | int err, n; | 129 | int err, n; |
46 | struct p9_client *clnt; | 130 | struct p9_client *clnt; |
@@ -54,12 +138,7 @@ struct p9_client *p9_client_create(struct p9_trans *trans, int msize, | |||
54 | if (!clnt) | 138 | if (!clnt) |
55 | return ERR_PTR(-ENOMEM); | 139 | return ERR_PTR(-ENOMEM); |
56 | 140 | ||
57 | P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n", | ||
58 | clnt, trans, msize, dotu); | ||
59 | spin_lock_init(&clnt->lock); | 141 | spin_lock_init(&clnt->lock); |
60 | clnt->trans = trans; | ||
61 | clnt->msize = msize; | ||
62 | clnt->dotu = dotu; | ||
63 | INIT_LIST_HEAD(&clnt->fidlist); | 142 | INIT_LIST_HEAD(&clnt->fidlist); |
64 | clnt->fidpool = p9_idpool_create(); | 143 | clnt->fidpool = p9_idpool_create(); |
65 | if (!clnt->fidpool) { | 144 | if (!clnt->fidpool) { |
@@ -68,13 +147,29 @@ struct p9_client *p9_client_create(struct p9_trans *trans, int msize, | |||
68 | goto error; | 147 | goto error; |
69 | } | 148 | } |
70 | 149 | ||
71 | clnt->conn = p9_conn_create(clnt->trans, clnt->msize, &clnt->dotu); | 150 | parse_opts(options, clnt); |
72 | if (IS_ERR(clnt->conn)) { | 151 | if (clnt->trans_mod == NULL) { |
73 | err = PTR_ERR(clnt->conn); | 152 | err = -EPROTONOSUPPORT; |
74 | clnt->conn = NULL; | 153 | P9_DPRINTK(P9_DEBUG_ERROR, |
154 | "No transport defined or default transport\n"); | ||
75 | goto error; | 155 | goto error; |
76 | } | 156 | } |
77 | 157 | ||
158 | P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n", | ||
159 | clnt, clnt->trans_mod, clnt->msize, clnt->dotu); | ||
160 | |||
161 | |||
162 | clnt->trans = clnt->trans_mod->create(dev_name, options, clnt->msize, | ||
163 | clnt->dotu); | ||
164 | if (IS_ERR(clnt->trans)) { | ||
165 | err = PTR_ERR(clnt->trans); | ||
166 | clnt->trans = NULL; | ||
167 | goto error; | ||
168 | } | ||
169 | |||
170 | if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) | ||
171 | clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; | ||
172 | |||
78 | tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000"); | 173 | tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000"); |
79 | if (IS_ERR(tc)) { | 174 | if (IS_ERR(tc)) { |
80 | err = PTR_ERR(tc); | 175 | err = PTR_ERR(tc); |
@@ -82,7 +177,7 @@ struct p9_client *p9_client_create(struct p9_trans *trans, int msize, | |||
82 | goto error; | 177 | goto error; |
83 | } | 178 | } |
84 | 179 | ||
85 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 180 | err = p9_client_rpc(clnt, tc, &rc); |
86 | if (err) | 181 | if (err) |
87 | goto error; | 182 | goto error; |
88 | 183 | ||
@@ -117,10 +212,6 @@ void p9_client_destroy(struct p9_client *clnt) | |||
117 | struct p9_fid *fid, *fidptr; | 212 | struct p9_fid *fid, *fidptr; |
118 | 213 | ||
119 | P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); | 214 | P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); |
120 | if (clnt->conn) { | ||
121 | p9_conn_destroy(clnt->conn); | ||
122 | clnt->conn = NULL; | ||
123 | } | ||
124 | 215 | ||
125 | if (clnt->trans) { | 216 | if (clnt->trans) { |
126 | clnt->trans->close(clnt->trans); | 217 | clnt->trans->close(clnt->trans); |
@@ -142,7 +233,6 @@ void p9_client_disconnect(struct p9_client *clnt) | |||
142 | { | 233 | { |
143 | P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); | 234 | P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); |
144 | clnt->trans->status = Disconnected; | 235 | clnt->trans->status = Disconnected; |
145 | p9_conn_cancel(clnt->conn, -EIO); | ||
146 | } | 236 | } |
147 | EXPORT_SYMBOL(p9_client_disconnect); | 237 | EXPORT_SYMBOL(p9_client_disconnect); |
148 | 238 | ||
@@ -174,7 +264,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, | |||
174 | goto error; | 264 | goto error; |
175 | } | 265 | } |
176 | 266 | ||
177 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 267 | err = p9_client_rpc(clnt, tc, &rc); |
178 | if (err) | 268 | if (err) |
179 | goto error; | 269 | goto error; |
180 | 270 | ||
@@ -219,7 +309,7 @@ struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, | |||
219 | goto error; | 309 | goto error; |
220 | } | 310 | } |
221 | 311 | ||
222 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 312 | err = p9_client_rpc(clnt, tc, &rc); |
223 | if (err) | 313 | if (err) |
224 | goto error; | 314 | goto error; |
225 | 315 | ||
@@ -270,7 +360,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, | |||
270 | goto error; | 360 | goto error; |
271 | } | 361 | } |
272 | 362 | ||
273 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 363 | err = p9_client_rpc(clnt, tc, &rc); |
274 | if (err) { | 364 | if (err) { |
275 | if (rc && rc->id == P9_RWALK) | 365 | if (rc && rc->id == P9_RWALK) |
276 | goto clunk_fid; | 366 | goto clunk_fid; |
@@ -305,7 +395,7 @@ clunk_fid: | |||
305 | goto error; | 395 | goto error; |
306 | } | 396 | } |
307 | 397 | ||
308 | p9_conn_rpc(clnt->conn, tc, &rc); | 398 | p9_client_rpc(clnt, tc, &rc); |
309 | 399 | ||
310 | error: | 400 | error: |
311 | kfree(tc); | 401 | kfree(tc); |
@@ -339,7 +429,7 @@ int p9_client_open(struct p9_fid *fid, int mode) | |||
339 | goto done; | 429 | goto done; |
340 | } | 430 | } |
341 | 431 | ||
342 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 432 | err = p9_client_rpc(clnt, tc, &rc); |
343 | if (err) | 433 | if (err) |
344 | goto done; | 434 | goto done; |
345 | 435 | ||
@@ -378,7 +468,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, | |||
378 | goto done; | 468 | goto done; |
379 | } | 469 | } |
380 | 470 | ||
381 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 471 | err = p9_client_rpc(clnt, tc, &rc); |
382 | if (err) | 472 | if (err) |
383 | goto done; | 473 | goto done; |
384 | 474 | ||
@@ -411,7 +501,7 @@ int p9_client_clunk(struct p9_fid *fid) | |||
411 | goto done; | 501 | goto done; |
412 | } | 502 | } |
413 | 503 | ||
414 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 504 | err = p9_client_rpc(clnt, tc, &rc); |
415 | if (err) | 505 | if (err) |
416 | goto done; | 506 | goto done; |
417 | 507 | ||
@@ -443,7 +533,7 @@ int p9_client_remove(struct p9_fid *fid) | |||
443 | goto done; | 533 | goto done; |
444 | } | 534 | } |
445 | 535 | ||
446 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 536 | err = p9_client_rpc(clnt, tc, &rc); |
447 | if (err) | 537 | if (err) |
448 | goto done; | 538 | goto done; |
449 | 539 | ||
@@ -485,7 +575,7 @@ int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count) | |||
485 | goto error; | 575 | goto error; |
486 | } | 576 | } |
487 | 577 | ||
488 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 578 | err = p9_client_rpc(clnt, tc, &rc); |
489 | if (err) | 579 | if (err) |
490 | goto error; | 580 | goto error; |
491 | 581 | ||
@@ -542,7 +632,7 @@ int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count) | |||
542 | goto error; | 632 | goto error; |
543 | } | 633 | } |
544 | 634 | ||
545 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 635 | err = p9_client_rpc(clnt, tc, &rc); |
546 | if (err) | 636 | if (err) |
547 | goto error; | 637 | goto error; |
548 | 638 | ||
@@ -596,7 +686,7 @@ p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count) | |||
596 | goto error; | 686 | goto error; |
597 | } | 687 | } |
598 | 688 | ||
599 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 689 | err = p9_client_rpc(clnt, tc, &rc); |
600 | if (err) | 690 | if (err) |
601 | goto error; | 691 | goto error; |
602 | 692 | ||
@@ -660,7 +750,7 @@ p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, | |||
660 | goto error; | 750 | goto error; |
661 | } | 751 | } |
662 | 752 | ||
663 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 753 | err = p9_client_rpc(clnt, tc, &rc); |
664 | if (err) | 754 | if (err) |
665 | goto error; | 755 | goto error; |
666 | 756 | ||
@@ -731,7 +821,7 @@ struct p9_stat *p9_client_stat(struct p9_fid *fid) | |||
731 | goto error; | 821 | goto error; |
732 | } | 822 | } |
733 | 823 | ||
734 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 824 | err = p9_client_rpc(clnt, tc, &rc); |
735 | if (err) | 825 | if (err) |
736 | goto error; | 826 | goto error; |
737 | 827 | ||
@@ -773,7 +863,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) | |||
773 | goto done; | 863 | goto done; |
774 | } | 864 | } |
775 | 865 | ||
776 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 866 | err = p9_client_rpc(clnt, tc, &rc); |
777 | 867 | ||
778 | done: | 868 | done: |
779 | kfree(tc); | 869 | kfree(tc); |
@@ -830,7 +920,7 @@ struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset) | |||
830 | goto error; | 920 | goto error; |
831 | } | 921 | } |
832 | 922 | ||
833 | err = p9_conn_rpc(clnt->conn, tc, &rc); | 923 | err = p9_client_rpc(clnt, tc, &rc); |
834 | if (err) | 924 | if (err) |
835 | goto error; | 925 | goto error; |
836 | 926 | ||
@@ -901,16 +991,21 @@ static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu) | |||
901 | memmove(ret, st, sizeof(struct p9_stat)); | 991 | memmove(ret, st, sizeof(struct p9_stat)); |
902 | p = ((char *) ret) + sizeof(struct p9_stat); | 992 | p = ((char *) ret) + sizeof(struct p9_stat); |
903 | memmove(p, st->name.str, st->name.len); | 993 | memmove(p, st->name.str, st->name.len); |
994 | ret->name.str = p; | ||
904 | p += st->name.len; | 995 | p += st->name.len; |
905 | memmove(p, st->uid.str, st->uid.len); | 996 | memmove(p, st->uid.str, st->uid.len); |
997 | ret->uid.str = p; | ||
906 | p += st->uid.len; | 998 | p += st->uid.len; |
907 | memmove(p, st->gid.str, st->gid.len); | 999 | memmove(p, st->gid.str, st->gid.len); |
1000 | ret->gid.str = p; | ||
908 | p += st->gid.len; | 1001 | p += st->gid.len; |
909 | memmove(p, st->muid.str, st->muid.len); | 1002 | memmove(p, st->muid.str, st->muid.len); |
1003 | ret->muid.str = p; | ||
910 | p += st->muid.len; | 1004 | p += st->muid.len; |
911 | 1005 | ||
912 | if (dotu) { | 1006 | if (dotu) { |
913 | memmove(p, st->extension.str, st->extension.len); | 1007 | memmove(p, st->extension.str, st->extension.len); |
1008 | ret->extension.str = p; | ||
914 | p += st->extension.len; | 1009 | p += st->extension.len; |
915 | } | 1010 | } |
916 | 1011 | ||
diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c index b1ae8ec57d54..40244fbd9b0d 100644 --- a/net/9p/fcprint.c +++ b/net/9p/fcprint.c | |||
@@ -347,12 +347,12 @@ p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) | |||
347 | 347 | ||
348 | return ret; | 348 | return ret; |
349 | } | 349 | } |
350 | |||
351 | #else | 350 | #else |
352 | int | 351 | int |
353 | p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) | 352 | p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) |
354 | { | 353 | { |
355 | return 0; | 354 | return 0; |
356 | } | 355 | } |
357 | EXPORT_SYMBOL(p9_printfcall); | ||
358 | #endif /* CONFIG_NET_9P_DEBUG */ | 356 | #endif /* CONFIG_NET_9P_DEBUG */ |
357 | EXPORT_SYMBOL(p9_printfcall); | ||
358 | |||
diff --git a/net/9p/mod.c b/net/9p/mod.c index 8f9763a9dc12..c285aab2af04 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c | |||
@@ -106,15 +106,10 @@ EXPORT_SYMBOL(v9fs_default_trans); | |||
106 | */ | 106 | */ |
107 | static int __init init_p9(void) | 107 | static int __init init_p9(void) |
108 | { | 108 | { |
109 | int ret; | 109 | int ret = 0; |
110 | 110 | ||
111 | p9_error_init(); | 111 | p9_error_init(); |
112 | printk(KERN_INFO "Installing 9P2000 support\n"); | 112 | printk(KERN_INFO "Installing 9P2000 support\n"); |
113 | ret = p9_mux_global_init(); | ||
114 | if (ret) { | ||
115 | printk(KERN_WARNING "9p: starting mux failed\n"); | ||
116 | return ret; | ||
117 | } | ||
118 | 113 | ||
119 | return ret; | 114 | return ret; |
120 | } | 115 | } |
@@ -126,7 +121,7 @@ static int __init init_p9(void) | |||
126 | 121 | ||
127 | static void __exit exit_p9(void) | 122 | static void __exit exit_p9(void) |
128 | { | 123 | { |
129 | p9_mux_global_exit(); | 124 | printk(KERN_INFO "Unloading 9P2000 support\n"); |
130 | } | 125 | } |
131 | 126 | ||
132 | module_init(init_p9) | 127 | module_init(init_p9) |
diff --git a/net/9p/mux.c b/net/9p/mux.c deleted file mode 100644 index c9f0805048e4..000000000000 --- a/net/9p/mux.c +++ /dev/null | |||
@@ -1,1060 +0,0 @@ | |||
1 | /* | ||
2 | * net/9p/mux.c | ||
3 | * | ||
4 | * Protocol Multiplexer | ||
5 | * | ||
6 | * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> | ||
7 | * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License version 2 | ||
11 | * as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to: | ||
20 | * Free Software Foundation | ||
21 | * 51 Franklin Street, Fifth Floor | ||
22 | * Boston, MA 02111-1301 USA | ||
23 | * | ||
24 | */ | ||
25 | |||
26 | #include <linux/module.h> | ||
27 | #include <linux/errno.h> | ||
28 | #include <linux/fs.h> | ||
29 | #include <linux/poll.h> | ||
30 | #include <linux/kthread.h> | ||
31 | #include <linux/idr.h> | ||
32 | #include <linux/mutex.h> | ||
33 | #include <net/9p/9p.h> | ||
34 | #include <linux/parser.h> | ||
35 | #include <net/9p/transport.h> | ||
36 | #include <net/9p/conn.h> | ||
37 | |||
38 | #define ERREQFLUSH 1 | ||
39 | #define SCHED_TIMEOUT 10 | ||
40 | #define MAXPOLLWADDR 2 | ||
41 | |||
42 | enum { | ||
43 | Rworksched = 1, /* read work scheduled or running */ | ||
44 | Rpending = 2, /* can read */ | ||
45 | Wworksched = 4, /* write work scheduled or running */ | ||
46 | Wpending = 8, /* can write */ | ||
47 | }; | ||
48 | |||
49 | enum { | ||
50 | None, | ||
51 | Flushing, | ||
52 | Flushed, | ||
53 | }; | ||
54 | |||
55 | struct p9_mux_poll_task; | ||
56 | |||
57 | struct p9_req { | ||
58 | spinlock_t lock; /* protect request structure */ | ||
59 | int tag; | ||
60 | struct p9_fcall *tcall; | ||
61 | struct p9_fcall *rcall; | ||
62 | int err; | ||
63 | p9_conn_req_callback cb; | ||
64 | void *cba; | ||
65 | int flush; | ||
66 | struct list_head req_list; | ||
67 | }; | ||
68 | |||
69 | struct p9_conn { | ||
70 | spinlock_t lock; /* protect lock structure */ | ||
71 | struct list_head mux_list; | ||
72 | struct p9_mux_poll_task *poll_task; | ||
73 | int msize; | ||
74 | unsigned char *extended; | ||
75 | struct p9_trans *trans; | ||
76 | struct p9_idpool *tagpool; | ||
77 | int err; | ||
78 | wait_queue_head_t equeue; | ||
79 | struct list_head req_list; | ||
80 | struct list_head unsent_req_list; | ||
81 | struct p9_fcall *rcall; | ||
82 | int rpos; | ||
83 | char *rbuf; | ||
84 | int wpos; | ||
85 | int wsize; | ||
86 | char *wbuf; | ||
87 | wait_queue_t poll_wait[MAXPOLLWADDR]; | ||
88 | wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; | ||
89 | poll_table pt; | ||
90 | struct work_struct rq; | ||
91 | struct work_struct wq; | ||
92 | unsigned long wsched; | ||
93 | }; | ||
94 | |||
95 | struct p9_mux_poll_task { | ||
96 | struct task_struct *task; | ||
97 | struct list_head mux_list; | ||
98 | int muxnum; | ||
99 | }; | ||
100 | |||
101 | struct p9_mux_rpc { | ||
102 | struct p9_conn *m; | ||
103 | int err; | ||
104 | struct p9_fcall *tcall; | ||
105 | struct p9_fcall *rcall; | ||
106 | wait_queue_head_t wqueue; | ||
107 | }; | ||
108 | |||
109 | static int p9_poll_proc(void *); | ||
110 | static void p9_read_work(struct work_struct *work); | ||
111 | static void p9_write_work(struct work_struct *work); | ||
112 | static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, | ||
113 | poll_table * p); | ||
114 | static u16 p9_mux_get_tag(struct p9_conn *); | ||
115 | static void p9_mux_put_tag(struct p9_conn *, u16); | ||
116 | |||
117 | static DEFINE_MUTEX(p9_mux_task_lock); | ||
118 | static struct workqueue_struct *p9_mux_wq; | ||
119 | |||
120 | static int p9_mux_num; | ||
121 | static int p9_mux_poll_task_num; | ||
122 | static struct p9_mux_poll_task p9_mux_poll_tasks[100]; | ||
123 | |||
124 | int p9_mux_global_init(void) | ||
125 | { | ||
126 | int i; | ||
127 | |||
128 | for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) | ||
129 | p9_mux_poll_tasks[i].task = NULL; | ||
130 | |||
131 | p9_mux_wq = create_workqueue("v9fs"); | ||
132 | if (!p9_mux_wq) { | ||
133 | printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); | ||
134 | return -ENOMEM; | ||
135 | } | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | void p9_mux_global_exit(void) | ||
141 | { | ||
142 | destroy_workqueue(p9_mux_wq); | ||
143 | } | ||
144 | |||
145 | /** | ||
146 | * p9_mux_calc_poll_procs - calculates the number of polling procs | ||
147 | * based on the number of mounted v9fs filesystems. | ||
148 | * | ||
149 | * The current implementation returns sqrt of the number of mounts. | ||
150 | */ | ||
151 | static int p9_mux_calc_poll_procs(int muxnum) | ||
152 | { | ||
153 | int n; | ||
154 | |||
155 | if (p9_mux_poll_task_num) | ||
156 | n = muxnum / p9_mux_poll_task_num + | ||
157 | (muxnum % p9_mux_poll_task_num ? 1 : 0); | ||
158 | else | ||
159 | n = 1; | ||
160 | |||
161 | if (n > ARRAY_SIZE(p9_mux_poll_tasks)) | ||
162 | n = ARRAY_SIZE(p9_mux_poll_tasks); | ||
163 | |||
164 | return n; | ||
165 | } | ||
166 | |||
167 | static int p9_mux_poll_start(struct p9_conn *m) | ||
168 | { | ||
169 | int i, n; | ||
170 | struct p9_mux_poll_task *vpt, *vptlast; | ||
171 | struct task_struct *pproc; | ||
172 | |||
173 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num, | ||
174 | p9_mux_poll_task_num); | ||
175 | mutex_lock(&p9_mux_task_lock); | ||
176 | |||
177 | n = p9_mux_calc_poll_procs(p9_mux_num + 1); | ||
178 | if (n > p9_mux_poll_task_num) { | ||
179 | for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { | ||
180 | if (p9_mux_poll_tasks[i].task == NULL) { | ||
181 | vpt = &p9_mux_poll_tasks[i]; | ||
182 | P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n", | ||
183 | vpt); | ||
184 | pproc = kthread_create(p9_poll_proc, vpt, | ||
185 | "v9fs-poll"); | ||
186 | |||
187 | if (!IS_ERR(pproc)) { | ||
188 | vpt->task = pproc; | ||
189 | INIT_LIST_HEAD(&vpt->mux_list); | ||
190 | vpt->muxnum = 0; | ||
191 | p9_mux_poll_task_num++; | ||
192 | wake_up_process(vpt->task); | ||
193 | } | ||
194 | break; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) | ||
199 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
200 | "warning: no free poll slots\n"); | ||
201 | } | ||
202 | |||
203 | n = (p9_mux_num + 1) / p9_mux_poll_task_num + | ||
204 | ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0); | ||
205 | |||
206 | vptlast = NULL; | ||
207 | for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { | ||
208 | vpt = &p9_mux_poll_tasks[i]; | ||
209 | if (vpt->task != NULL) { | ||
210 | vptlast = vpt; | ||
211 | if (vpt->muxnum < n) { | ||
212 | P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); | ||
213 | list_add(&m->mux_list, &vpt->mux_list); | ||
214 | vpt->muxnum++; | ||
215 | m->poll_task = vpt; | ||
216 | memset(&m->poll_waddr, 0, | ||
217 | sizeof(m->poll_waddr)); | ||
218 | init_poll_funcptr(&m->pt, p9_pollwait); | ||
219 | break; | ||
220 | } | ||
221 | } | ||
222 | } | ||
223 | |||
224 | if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { | ||
225 | if (vptlast == NULL) { | ||
226 | mutex_unlock(&p9_mux_task_lock); | ||
227 | return -ENOMEM; | ||
228 | } | ||
229 | |||
230 | P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); | ||
231 | list_add(&m->mux_list, &vptlast->mux_list); | ||
232 | vptlast->muxnum++; | ||
233 | m->poll_task = vptlast; | ||
234 | memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); | ||
235 | init_poll_funcptr(&m->pt, p9_pollwait); | ||
236 | } | ||
237 | |||
238 | p9_mux_num++; | ||
239 | mutex_unlock(&p9_mux_task_lock); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static void p9_mux_poll_stop(struct p9_conn *m) | ||
245 | { | ||
246 | int i; | ||
247 | struct p9_mux_poll_task *vpt; | ||
248 | |||
249 | mutex_lock(&p9_mux_task_lock); | ||
250 | vpt = m->poll_task; | ||
251 | list_del(&m->mux_list); | ||
252 | for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { | ||
253 | if (m->poll_waddr[i] != NULL) { | ||
254 | remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); | ||
255 | m->poll_waddr[i] = NULL; | ||
256 | } | ||
257 | } | ||
258 | vpt->muxnum--; | ||
259 | if (!vpt->muxnum) { | ||
260 | P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt); | ||
261 | kthread_stop(vpt->task); | ||
262 | vpt->task = NULL; | ||
263 | p9_mux_poll_task_num--; | ||
264 | } | ||
265 | p9_mux_num--; | ||
266 | mutex_unlock(&p9_mux_task_lock); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * p9_conn_create - allocate and initialize the per-session mux data | ||
271 | * Creates the polling task if this is the first session. | ||
272 | * | ||
273 | * @trans - transport structure | ||
274 | * @msize - maximum message size | ||
275 | * @extended - pointer to the extended flag | ||
276 | */ | ||
277 | struct p9_conn *p9_conn_create(struct p9_trans *trans, int msize, | ||
278 | unsigned char *extended) | ||
279 | { | ||
280 | int i, n; | ||
281 | struct p9_conn *m, *mtmp; | ||
282 | |||
283 | P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, msize); | ||
284 | m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); | ||
285 | if (!m) | ||
286 | return ERR_PTR(-ENOMEM); | ||
287 | |||
288 | spin_lock_init(&m->lock); | ||
289 | INIT_LIST_HEAD(&m->mux_list); | ||
290 | m->msize = msize; | ||
291 | m->extended = extended; | ||
292 | m->trans = trans; | ||
293 | m->tagpool = p9_idpool_create(); | ||
294 | if (IS_ERR(m->tagpool)) { | ||
295 | mtmp = ERR_PTR(-ENOMEM); | ||
296 | kfree(m); | ||
297 | return mtmp; | ||
298 | } | ||
299 | |||
300 | m->err = 0; | ||
301 | init_waitqueue_head(&m->equeue); | ||
302 | INIT_LIST_HEAD(&m->req_list); | ||
303 | INIT_LIST_HEAD(&m->unsent_req_list); | ||
304 | m->rcall = NULL; | ||
305 | m->rpos = 0; | ||
306 | m->rbuf = NULL; | ||
307 | m->wpos = m->wsize = 0; | ||
308 | m->wbuf = NULL; | ||
309 | INIT_WORK(&m->rq, p9_read_work); | ||
310 | INIT_WORK(&m->wq, p9_write_work); | ||
311 | m->wsched = 0; | ||
312 | memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); | ||
313 | m->poll_task = NULL; | ||
314 | n = p9_mux_poll_start(m); | ||
315 | if (n) { | ||
316 | kfree(m); | ||
317 | return ERR_PTR(n); | ||
318 | } | ||
319 | |||
320 | n = trans->poll(trans, &m->pt); | ||
321 | if (n & POLLIN) { | ||
322 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); | ||
323 | set_bit(Rpending, &m->wsched); | ||
324 | } | ||
325 | |||
326 | if (n & POLLOUT) { | ||
327 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); | ||
328 | set_bit(Wpending, &m->wsched); | ||
329 | } | ||
330 | |||
331 | for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { | ||
332 | if (IS_ERR(m->poll_waddr[i])) { | ||
333 | p9_mux_poll_stop(m); | ||
334 | mtmp = (void *)m->poll_waddr; /* the error code */ | ||
335 | kfree(m); | ||
336 | m = mtmp; | ||
337 | break; | ||
338 | } | ||
339 | } | ||
340 | |||
341 | return m; | ||
342 | } | ||
343 | EXPORT_SYMBOL(p9_conn_create); | ||
344 | |||
345 | /** | ||
346 | * p9_mux_destroy - cancels all pending requests and frees mux resources | ||
347 | */ | ||
348 | void p9_conn_destroy(struct p9_conn *m) | ||
349 | { | ||
350 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, | ||
351 | m->mux_list.prev, m->mux_list.next); | ||
352 | p9_conn_cancel(m, -ECONNRESET); | ||
353 | |||
354 | if (!list_empty(&m->req_list)) { | ||
355 | /* wait until all processes waiting on this session exit */ | ||
356 | P9_DPRINTK(P9_DEBUG_MUX, | ||
357 | "mux %p waiting for empty request queue\n", m); | ||
358 | wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); | ||
359 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, | ||
360 | list_empty(&m->req_list)); | ||
361 | } | ||
362 | |||
363 | p9_mux_poll_stop(m); | ||
364 | m->trans = NULL; | ||
365 | p9_idpool_destroy(m->tagpool); | ||
366 | kfree(m); | ||
367 | } | ||
368 | EXPORT_SYMBOL(p9_conn_destroy); | ||
369 | |||
370 | /** | ||
371 | * p9_pollwait - called by files poll operation to add v9fs-poll task | ||
372 | * to files wait queue | ||
373 | */ | ||
374 | static void | ||
375 | p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, | ||
376 | poll_table * p) | ||
377 | { | ||
378 | int i; | ||
379 | struct p9_conn *m; | ||
380 | |||
381 | m = container_of(p, struct p9_conn, pt); | ||
382 | for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) | ||
383 | if (m->poll_waddr[i] == NULL) | ||
384 | break; | ||
385 | |||
386 | if (i >= ARRAY_SIZE(m->poll_waddr)) { | ||
387 | P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); | ||
388 | return; | ||
389 | } | ||
390 | |||
391 | m->poll_waddr[i] = wait_address; | ||
392 | |||
393 | if (!wait_address) { | ||
394 | P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n"); | ||
395 | m->poll_waddr[i] = ERR_PTR(-EIO); | ||
396 | return; | ||
397 | } | ||
398 | |||
399 | init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); | ||
400 | add_wait_queue(wait_address, &m->poll_wait[i]); | ||
401 | } | ||
402 | |||
403 | /** | ||
404 | * p9_poll_mux - polls a mux and schedules read or write works if necessary | ||
405 | */ | ||
406 | static void p9_poll_mux(struct p9_conn *m) | ||
407 | { | ||
408 | int n; | ||
409 | |||
410 | if (m->err < 0) | ||
411 | return; | ||
412 | |||
413 | n = m->trans->poll(m->trans, NULL); | ||
414 | if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { | ||
415 | P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n); | ||
416 | if (n >= 0) | ||
417 | n = -ECONNRESET; | ||
418 | p9_conn_cancel(m, n); | ||
419 | } | ||
420 | |||
421 | if (n & POLLIN) { | ||
422 | set_bit(Rpending, &m->wsched); | ||
423 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); | ||
424 | if (!test_and_set_bit(Rworksched, &m->wsched)) { | ||
425 | P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); | ||
426 | queue_work(p9_mux_wq, &m->rq); | ||
427 | } | ||
428 | } | ||
429 | |||
430 | if (n & POLLOUT) { | ||
431 | set_bit(Wpending, &m->wsched); | ||
432 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); | ||
433 | if ((m->wsize || !list_empty(&m->unsent_req_list)) | ||
434 | && !test_and_set_bit(Wworksched, &m->wsched)) { | ||
435 | P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); | ||
436 | queue_work(p9_mux_wq, &m->wq); | ||
437 | } | ||
438 | } | ||
439 | } | ||
440 | |||
441 | /** | ||
442 | * p9_poll_proc - polls all v9fs transports for new events and queues | ||
443 | * the appropriate work to the work queue | ||
444 | */ | ||
445 | static int p9_poll_proc(void *a) | ||
446 | { | ||
447 | struct p9_conn *m, *mtmp; | ||
448 | struct p9_mux_poll_task *vpt; | ||
449 | |||
450 | vpt = a; | ||
451 | P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt); | ||
452 | while (!kthread_should_stop()) { | ||
453 | set_current_state(TASK_INTERRUPTIBLE); | ||
454 | |||
455 | list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { | ||
456 | p9_poll_mux(m); | ||
457 | } | ||
458 | |||
459 | P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n"); | ||
460 | schedule_timeout(SCHED_TIMEOUT * HZ); | ||
461 | } | ||
462 | |||
463 | __set_current_state(TASK_RUNNING); | ||
464 | P9_DPRINTK(P9_DEBUG_MUX, "finish\n"); | ||
465 | return 0; | ||
466 | } | ||
467 | |||
468 | /** | ||
469 | * p9_write_work - called when a transport can send some data | ||
470 | */ | ||
471 | static void p9_write_work(struct work_struct *work) | ||
472 | { | ||
473 | int n, err; | ||
474 | struct p9_conn *m; | ||
475 | struct p9_req *req; | ||
476 | |||
477 | m = container_of(work, struct p9_conn, wq); | ||
478 | |||
479 | if (m->err < 0) { | ||
480 | clear_bit(Wworksched, &m->wsched); | ||
481 | return; | ||
482 | } | ||
483 | |||
484 | if (!m->wsize) { | ||
485 | if (list_empty(&m->unsent_req_list)) { | ||
486 | clear_bit(Wworksched, &m->wsched); | ||
487 | return; | ||
488 | } | ||
489 | |||
490 | spin_lock(&m->lock); | ||
491 | again: | ||
492 | req = list_entry(m->unsent_req_list.next, struct p9_req, | ||
493 | req_list); | ||
494 | list_move_tail(&req->req_list, &m->req_list); | ||
495 | if (req->err == ERREQFLUSH) | ||
496 | goto again; | ||
497 | |||
498 | m->wbuf = req->tcall->sdata; | ||
499 | m->wsize = req->tcall->size; | ||
500 | m->wpos = 0; | ||
501 | spin_unlock(&m->lock); | ||
502 | } | ||
503 | |||
504 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, | ||
505 | m->wsize); | ||
506 | clear_bit(Wpending, &m->wsched); | ||
507 | err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); | ||
508 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err); | ||
509 | if (err == -EAGAIN) { | ||
510 | clear_bit(Wworksched, &m->wsched); | ||
511 | return; | ||
512 | } | ||
513 | |||
514 | if (err < 0) | ||
515 | goto error; | ||
516 | else if (err == 0) { | ||
517 | err = -EREMOTEIO; | ||
518 | goto error; | ||
519 | } | ||
520 | |||
521 | m->wpos += err; | ||
522 | if (m->wpos == m->wsize) | ||
523 | m->wpos = m->wsize = 0; | ||
524 | |||
525 | if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { | ||
526 | if (test_and_clear_bit(Wpending, &m->wsched)) | ||
527 | n = POLLOUT; | ||
528 | else | ||
529 | n = m->trans->poll(m->trans, NULL); | ||
530 | |||
531 | if (n & POLLOUT) { | ||
532 | P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); | ||
533 | queue_work(p9_mux_wq, &m->wq); | ||
534 | } else | ||
535 | clear_bit(Wworksched, &m->wsched); | ||
536 | } else | ||
537 | clear_bit(Wworksched, &m->wsched); | ||
538 | |||
539 | return; | ||
540 | |||
541 | error: | ||
542 | p9_conn_cancel(m, err); | ||
543 | clear_bit(Wworksched, &m->wsched); | ||
544 | } | ||
545 | |||
546 | static void process_request(struct p9_conn *m, struct p9_req *req) | ||
547 | { | ||
548 | int ecode; | ||
549 | struct p9_str *ename; | ||
550 | |||
551 | if (!req->err && req->rcall->id == P9_RERROR) { | ||
552 | ecode = req->rcall->params.rerror.errno; | ||
553 | ename = &req->rcall->params.rerror.error; | ||
554 | |||
555 | P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len, | ||
556 | ename->str); | ||
557 | |||
558 | if (*m->extended) | ||
559 | req->err = -ecode; | ||
560 | |||
561 | if (!req->err) { | ||
562 | req->err = p9_errstr2errno(ename->str, ename->len); | ||
563 | |||
564 | if (!req->err) { /* string match failed */ | ||
565 | PRINT_FCALL_ERROR("unknown error", req->rcall); | ||
566 | } | ||
567 | |||
568 | if (!req->err) | ||
569 | req->err = -ESERVERFAULT; | ||
570 | } | ||
571 | } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { | ||
572 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
573 | "fcall mismatch: expected %d, got %d\n", | ||
574 | req->tcall->id + 1, req->rcall->id); | ||
575 | if (!req->err) | ||
576 | req->err = -EIO; | ||
577 | } | ||
578 | } | ||
579 | |||
580 | /** | ||
581 | * p9_read_work - called when there is some data to be read from a transport | ||
582 | */ | ||
583 | static void p9_read_work(struct work_struct *work) | ||
584 | { | ||
585 | int n, err; | ||
586 | struct p9_conn *m; | ||
587 | struct p9_req *req, *rptr, *rreq; | ||
588 | struct p9_fcall *rcall; | ||
589 | char *rbuf; | ||
590 | |||
591 | m = container_of(work, struct p9_conn, rq); | ||
592 | |||
593 | if (m->err < 0) | ||
594 | return; | ||
595 | |||
596 | rcall = NULL; | ||
597 | P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); | ||
598 | |||
599 | if (!m->rcall) { | ||
600 | m->rcall = | ||
601 | kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL); | ||
602 | if (!m->rcall) { | ||
603 | err = -ENOMEM; | ||
604 | goto error; | ||
605 | } | ||
606 | |||
607 | m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); | ||
608 | m->rpos = 0; | ||
609 | } | ||
610 | |||
611 | clear_bit(Rpending, &m->wsched); | ||
612 | err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); | ||
613 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err); | ||
614 | if (err == -EAGAIN) { | ||
615 | clear_bit(Rworksched, &m->wsched); | ||
616 | return; | ||
617 | } | ||
618 | |||
619 | if (err <= 0) | ||
620 | goto error; | ||
621 | |||
622 | m->rpos += err; | ||
623 | while (m->rpos > 4) { | ||
624 | n = le32_to_cpu(*(__le32 *) m->rbuf); | ||
625 | if (n >= m->msize) { | ||
626 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
627 | "requested packet size too big: %d\n", n); | ||
628 | err = -EIO; | ||
629 | goto error; | ||
630 | } | ||
631 | |||
632 | if (m->rpos < n) | ||
633 | break; | ||
634 | |||
635 | err = | ||
636 | p9_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended); | ||
637 | if (err < 0) { | ||
638 | goto error; | ||
639 | } | ||
640 | |||
641 | #ifdef CONFIG_NET_9P_DEBUG | ||
642 | if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { | ||
643 | char buf[150]; | ||
644 | |||
645 | p9_printfcall(buf, sizeof(buf), m->rcall, | ||
646 | *m->extended); | ||
647 | printk(KERN_NOTICE ">>> %p %s\n", m, buf); | ||
648 | } | ||
649 | #endif | ||
650 | |||
651 | rcall = m->rcall; | ||
652 | rbuf = m->rbuf; | ||
653 | if (m->rpos > n) { | ||
654 | m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize, | ||
655 | GFP_KERNEL); | ||
656 | if (!m->rcall) { | ||
657 | err = -ENOMEM; | ||
658 | goto error; | ||
659 | } | ||
660 | |||
661 | m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); | ||
662 | memmove(m->rbuf, rbuf + n, m->rpos - n); | ||
663 | m->rpos -= n; | ||
664 | } else { | ||
665 | m->rcall = NULL; | ||
666 | m->rbuf = NULL; | ||
667 | m->rpos = 0; | ||
668 | } | ||
669 | |||
670 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, | ||
671 | rcall->id, rcall->tag); | ||
672 | |||
673 | req = NULL; | ||
674 | spin_lock(&m->lock); | ||
675 | list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { | ||
676 | if (rreq->tag == rcall->tag) { | ||
677 | req = rreq; | ||
678 | if (req->flush != Flushing) | ||
679 | list_del(&req->req_list); | ||
680 | break; | ||
681 | } | ||
682 | } | ||
683 | spin_unlock(&m->lock); | ||
684 | |||
685 | if (req) { | ||
686 | req->rcall = rcall; | ||
687 | process_request(m, req); | ||
688 | |||
689 | if (req->flush != Flushing) { | ||
690 | if (req->cb) | ||
691 | (*req->cb) (req, req->cba); | ||
692 | else | ||
693 | kfree(req->rcall); | ||
694 | |||
695 | wake_up(&m->equeue); | ||
696 | } | ||
697 | } else { | ||
698 | if (err >= 0 && rcall->id != P9_RFLUSH) | ||
699 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
700 | "unexpected response mux %p id %d tag %d\n", | ||
701 | m, rcall->id, rcall->tag); | ||
702 | kfree(rcall); | ||
703 | } | ||
704 | } | ||
705 | |||
706 | if (!list_empty(&m->req_list)) { | ||
707 | if (test_and_clear_bit(Rpending, &m->wsched)) | ||
708 | n = POLLIN; | ||
709 | else | ||
710 | n = m->trans->poll(m->trans, NULL); | ||
711 | |||
712 | if (n & POLLIN) { | ||
713 | P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); | ||
714 | queue_work(p9_mux_wq, &m->rq); | ||
715 | } else | ||
716 | clear_bit(Rworksched, &m->wsched); | ||
717 | } else | ||
718 | clear_bit(Rworksched, &m->wsched); | ||
719 | |||
720 | return; | ||
721 | |||
722 | error: | ||
723 | p9_conn_cancel(m, err); | ||
724 | clear_bit(Rworksched, &m->wsched); | ||
725 | } | ||
726 | |||
727 | /** | ||
728 | * p9_send_request - send 9P request | ||
729 | * The function can sleep until the request is scheduled for sending. | ||
730 | * The function can be interrupted. Return from the function is not | ||
731 | * a guarantee that the request is sent successfully. Can return errors | ||
732 | * that can be retrieved by PTR_ERR macros. | ||
733 | * | ||
734 | * @m: mux data | ||
735 | * @tc: request to be sent | ||
736 | * @cb: callback function to call when response is received | ||
737 | * @cba: parameter to pass to the callback function | ||
738 | */ | ||
739 | static struct p9_req *p9_send_request(struct p9_conn *m, | ||
740 | struct p9_fcall *tc, | ||
741 | p9_conn_req_callback cb, void *cba) | ||
742 | { | ||
743 | int n; | ||
744 | struct p9_req *req; | ||
745 | |||
746 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, | ||
747 | tc, tc->id); | ||
748 | if (m->err < 0) | ||
749 | return ERR_PTR(m->err); | ||
750 | |||
751 | req = kmalloc(sizeof(struct p9_req), GFP_KERNEL); | ||
752 | if (!req) | ||
753 | return ERR_PTR(-ENOMEM); | ||
754 | |||
755 | if (tc->id == P9_TVERSION) | ||
756 | n = P9_NOTAG; | ||
757 | else | ||
758 | n = p9_mux_get_tag(m); | ||
759 | |||
760 | if (n < 0) | ||
761 | return ERR_PTR(-ENOMEM); | ||
762 | |||
763 | p9_set_tag(tc, n); | ||
764 | |||
765 | #ifdef CONFIG_NET_9P_DEBUG | ||
766 | if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { | ||
767 | char buf[150]; | ||
768 | |||
769 | p9_printfcall(buf, sizeof(buf), tc, *m->extended); | ||
770 | printk(KERN_NOTICE "<<< %p %s\n", m, buf); | ||
771 | } | ||
772 | #endif | ||
773 | |||
774 | spin_lock_init(&req->lock); | ||
775 | req->tag = n; | ||
776 | req->tcall = tc; | ||
777 | req->rcall = NULL; | ||
778 | req->err = 0; | ||
779 | req->cb = cb; | ||
780 | req->cba = cba; | ||
781 | req->flush = None; | ||
782 | |||
783 | spin_lock(&m->lock); | ||
784 | list_add_tail(&req->req_list, &m->unsent_req_list); | ||
785 | spin_unlock(&m->lock); | ||
786 | |||
787 | if (test_and_clear_bit(Wpending, &m->wsched)) | ||
788 | n = POLLOUT; | ||
789 | else | ||
790 | n = m->trans->poll(m->trans, NULL); | ||
791 | |||
792 | if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) | ||
793 | queue_work(p9_mux_wq, &m->wq); | ||
794 | |||
795 | return req; | ||
796 | } | ||
797 | |||
798 | static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) | ||
799 | { | ||
800 | p9_mux_put_tag(m, req->tag); | ||
801 | kfree(req); | ||
802 | } | ||
803 | |||
804 | static void p9_mux_flush_cb(struct p9_req *freq, void *a) | ||
805 | { | ||
806 | p9_conn_req_callback cb; | ||
807 | int tag; | ||
808 | struct p9_conn *m; | ||
809 | struct p9_req *req, *rreq, *rptr; | ||
810 | |||
811 | m = a; | ||
812 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, | ||
813 | freq->tcall, freq->rcall, freq->err, | ||
814 | freq->tcall->params.tflush.oldtag); | ||
815 | |||
816 | spin_lock(&m->lock); | ||
817 | cb = NULL; | ||
818 | tag = freq->tcall->params.tflush.oldtag; | ||
819 | req = NULL; | ||
820 | list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { | ||
821 | if (rreq->tag == tag) { | ||
822 | req = rreq; | ||
823 | list_del(&req->req_list); | ||
824 | break; | ||
825 | } | ||
826 | } | ||
827 | spin_unlock(&m->lock); | ||
828 | |||
829 | if (req) { | ||
830 | spin_lock(&req->lock); | ||
831 | req->flush = Flushed; | ||
832 | spin_unlock(&req->lock); | ||
833 | |||
834 | if (req->cb) | ||
835 | (*req->cb) (req, req->cba); | ||
836 | else | ||
837 | kfree(req->rcall); | ||
838 | |||
839 | wake_up(&m->equeue); | ||
840 | } | ||
841 | |||
842 | kfree(freq->tcall); | ||
843 | kfree(freq->rcall); | ||
844 | p9_mux_free_request(m, freq); | ||
845 | } | ||
846 | |||
847 | static int | ||
848 | p9_mux_flush_request(struct p9_conn *m, struct p9_req *req) | ||
849 | { | ||
850 | struct p9_fcall *fc; | ||
851 | struct p9_req *rreq, *rptr; | ||
852 | |||
853 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); | ||
854 | |||
855 | /* if a response was received for a request, do nothing */ | ||
856 | spin_lock(&req->lock); | ||
857 | if (req->rcall || req->err) { | ||
858 | spin_unlock(&req->lock); | ||
859 | P9_DPRINTK(P9_DEBUG_MUX, | ||
860 | "mux %p req %p response already received\n", m, req); | ||
861 | return 0; | ||
862 | } | ||
863 | |||
864 | req->flush = Flushing; | ||
865 | spin_unlock(&req->lock); | ||
866 | |||
867 | spin_lock(&m->lock); | ||
868 | /* if the request is not sent yet, just remove it from the list */ | ||
869 | list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { | ||
870 | if (rreq->tag == req->tag) { | ||
871 | P9_DPRINTK(P9_DEBUG_MUX, | ||
872 | "mux %p req %p request is not sent yet\n", m, req); | ||
873 | list_del(&rreq->req_list); | ||
874 | req->flush = Flushed; | ||
875 | spin_unlock(&m->lock); | ||
876 | if (req->cb) | ||
877 | (*req->cb) (req, req->cba); | ||
878 | return 0; | ||
879 | } | ||
880 | } | ||
881 | spin_unlock(&m->lock); | ||
882 | |||
883 | clear_thread_flag(TIF_SIGPENDING); | ||
884 | fc = p9_create_tflush(req->tag); | ||
885 | p9_send_request(m, fc, p9_mux_flush_cb, m); | ||
886 | return 1; | ||
887 | } | ||
888 | |||
889 | static void | ||
890 | p9_conn_rpc_cb(struct p9_req *req, void *a) | ||
891 | { | ||
892 | struct p9_mux_rpc *r; | ||
893 | |||
894 | P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a); | ||
895 | r = a; | ||
896 | r->rcall = req->rcall; | ||
897 | r->err = req->err; | ||
898 | |||
899 | if (req->flush != None && !req->err) | ||
900 | r->err = -ERESTARTSYS; | ||
901 | |||
902 | wake_up(&r->wqueue); | ||
903 | } | ||
904 | |||
905 | /** | ||
906 | * p9_mux_rpc - sends 9P request and waits until a response is available. | ||
907 | * The function can be interrupted. | ||
908 | * @m: mux data | ||
909 | * @tc: request to be sent | ||
910 | * @rc: pointer where a pointer to the response is stored | ||
911 | */ | ||
912 | int | ||
913 | p9_conn_rpc(struct p9_conn *m, struct p9_fcall *tc, | ||
914 | struct p9_fcall **rc) | ||
915 | { | ||
916 | int err, sigpending; | ||
917 | unsigned long flags; | ||
918 | struct p9_req *req; | ||
919 | struct p9_mux_rpc r; | ||
920 | |||
921 | r.err = 0; | ||
922 | r.tcall = tc; | ||
923 | r.rcall = NULL; | ||
924 | r.m = m; | ||
925 | init_waitqueue_head(&r.wqueue); | ||
926 | |||
927 | if (rc) | ||
928 | *rc = NULL; | ||
929 | |||
930 | sigpending = 0; | ||
931 | if (signal_pending(current)) { | ||
932 | sigpending = 1; | ||
933 | clear_thread_flag(TIF_SIGPENDING); | ||
934 | } | ||
935 | |||
936 | req = p9_send_request(m, tc, p9_conn_rpc_cb, &r); | ||
937 | if (IS_ERR(req)) { | ||
938 | err = PTR_ERR(req); | ||
939 | P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); | ||
940 | return err; | ||
941 | } | ||
942 | |||
943 | err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); | ||
944 | if (r.err < 0) | ||
945 | err = r.err; | ||
946 | |||
947 | if (err == -ERESTARTSYS && m->trans->status == Connected | ||
948 | && m->err == 0) { | ||
949 | if (p9_mux_flush_request(m, req)) { | ||
950 | /* wait until we get response of the flush message */ | ||
951 | do { | ||
952 | clear_thread_flag(TIF_SIGPENDING); | ||
953 | err = wait_event_interruptible(r.wqueue, | ||
954 | r.rcall || r.err); | ||
955 | } while (!r.rcall && !r.err && err == -ERESTARTSYS && | ||
956 | m->trans->status == Connected && !m->err); | ||
957 | |||
958 | err = -ERESTARTSYS; | ||
959 | } | ||
960 | sigpending = 1; | ||
961 | } | ||
962 | |||
963 | if (sigpending) { | ||
964 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
965 | recalc_sigpending(); | ||
966 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
967 | } | ||
968 | |||
969 | if (rc) | ||
970 | *rc = r.rcall; | ||
971 | else | ||
972 | kfree(r.rcall); | ||
973 | |||
974 | p9_mux_free_request(m, req); | ||
975 | if (err > 0) | ||
976 | err = -EIO; | ||
977 | |||
978 | return err; | ||
979 | } | ||
980 | EXPORT_SYMBOL(p9_conn_rpc); | ||
981 | |||
982 | #ifdef P9_NONBLOCK | ||
983 | /** | ||
984 | * p9_conn_rpcnb - sends 9P request without waiting for response. | ||
985 | * @m: mux data | ||
986 | * @tc: request to be sent | ||
987 | * @cb: callback function to be called when response arrives | ||
988 | * @cba: value to pass to the callback function | ||
989 | */ | ||
990 | int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, | ||
991 | p9_conn_req_callback cb, void *a) | ||
992 | { | ||
993 | int err; | ||
994 | struct p9_req *req; | ||
995 | |||
996 | req = p9_send_request(m, tc, cb, a); | ||
997 | if (IS_ERR(req)) { | ||
998 | err = PTR_ERR(req); | ||
999 | P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); | ||
1000 | return PTR_ERR(req); | ||
1001 | } | ||
1002 | |||
1003 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); | ||
1004 | return 0; | ||
1005 | } | ||
1006 | EXPORT_SYMBOL(p9_conn_rpcnb); | ||
1007 | #endif /* P9_NONBLOCK */ | ||
1008 | |||
1009 | /** | ||
1010 | * p9_conn_cancel - cancel all pending requests with error | ||
1011 | * @m: mux data | ||
1012 | * @err: error code | ||
1013 | */ | ||
1014 | void p9_conn_cancel(struct p9_conn *m, int err) | ||
1015 | { | ||
1016 | struct p9_req *req, *rtmp; | ||
1017 | LIST_HEAD(cancel_list); | ||
1018 | |||
1019 | P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); | ||
1020 | m->err = err; | ||
1021 | spin_lock(&m->lock); | ||
1022 | list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { | ||
1023 | list_move(&req->req_list, &cancel_list); | ||
1024 | } | ||
1025 | list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { | ||
1026 | list_move(&req->req_list, &cancel_list); | ||
1027 | } | ||
1028 | spin_unlock(&m->lock); | ||
1029 | |||
1030 | list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { | ||
1031 | list_del(&req->req_list); | ||
1032 | if (!req->err) | ||
1033 | req->err = err; | ||
1034 | |||
1035 | if (req->cb) | ||
1036 | (*req->cb) (req, req->cba); | ||
1037 | else | ||
1038 | kfree(req->rcall); | ||
1039 | } | ||
1040 | |||
1041 | wake_up(&m->equeue); | ||
1042 | } | ||
1043 | EXPORT_SYMBOL(p9_conn_cancel); | ||
1044 | |||
1045 | static u16 p9_mux_get_tag(struct p9_conn *m) | ||
1046 | { | ||
1047 | int tag; | ||
1048 | |||
1049 | tag = p9_idpool_get(m->tagpool); | ||
1050 | if (tag < 0) | ||
1051 | return P9_NOTAG; | ||
1052 | else | ||
1053 | return (u16) tag; | ||
1054 | } | ||
1055 | |||
1056 | static void p9_mux_put_tag(struct p9_conn *m, u16 tag) | ||
1057 | { | ||
1058 | if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool)) | ||
1059 | p9_idpool_put(tag, m->tagpool); | ||
1060 | } | ||
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 62332ed9da4a..1aa9d5175398 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> | 6 | * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> |
7 | * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> | 7 | * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> |
8 | * Copyright (C) 2004-2007 by Eric Van Hensbergen <ericvh@gmail.com> | 8 | * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> |
9 | * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> | 9 | * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> |
10 | * | 10 | * |
11 | * This program is free software; you can redistribute it and/or modify | 11 | * This program is free software; you can redistribute it and/or modify |
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/net.h> | 30 | #include <linux/net.h> |
31 | #include <linux/ipv6.h> | 31 | #include <linux/ipv6.h> |
32 | #include <linux/kthread.h> | ||
32 | #include <linux/errno.h> | 33 | #include <linux/errno.h> |
33 | #include <linux/kernel.h> | 34 | #include <linux/kernel.h> |
34 | #include <linux/un.h> | 35 | #include <linux/un.h> |
@@ -42,7 +43,9 @@ | |||
42 | 43 | ||
43 | #define P9_PORT 564 | 44 | #define P9_PORT 564 |
44 | #define MAX_SOCK_BUF (64*1024) | 45 | #define MAX_SOCK_BUF (64*1024) |
45 | 46 | #define ERREQFLUSH 1 | |
47 | #define SCHED_TIMEOUT 10 | ||
48 | #define MAXPOLLWADDR 2 | ||
46 | 49 | ||
47 | struct p9_fd_opts { | 50 | struct p9_fd_opts { |
48 | int rfd; | 51 | int rfd; |
@@ -53,6 +56,7 @@ struct p9_fd_opts { | |||
53 | struct p9_trans_fd { | 56 | struct p9_trans_fd { |
54 | struct file *rd; | 57 | struct file *rd; |
55 | struct file *wr; | 58 | struct file *wr; |
59 | struct p9_conn *conn; | ||
56 | }; | 60 | }; |
57 | 61 | ||
58 | /* | 62 | /* |
@@ -72,6 +76,1028 @@ static match_table_t tokens = { | |||
72 | {Opt_err, NULL}, | 76 | {Opt_err, NULL}, |
73 | }; | 77 | }; |
74 | 78 | ||
79 | enum { | ||
80 | Rworksched = 1, /* read work scheduled or running */ | ||
81 | Rpending = 2, /* can read */ | ||
82 | Wworksched = 4, /* write work scheduled or running */ | ||
83 | Wpending = 8, /* can write */ | ||
84 | }; | ||
85 | |||
86 | enum { | ||
87 | None, | ||
88 | Flushing, | ||
89 | Flushed, | ||
90 | }; | ||
91 | |||
92 | struct p9_req; | ||
93 | |||
94 | typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); | ||
95 | struct p9_req { | ||
96 | spinlock_t lock; /* protect request structure */ | ||
97 | int tag; | ||
98 | struct p9_fcall *tcall; | ||
99 | struct p9_fcall *rcall; | ||
100 | int err; | ||
101 | p9_conn_req_callback cb; | ||
102 | void *cba; | ||
103 | int flush; | ||
104 | struct list_head req_list; | ||
105 | }; | ||
106 | |||
107 | struct p9_mux_poll_task; | ||
108 | |||
109 | struct p9_conn { | ||
110 | spinlock_t lock; /* protect lock structure */ | ||
111 | struct list_head mux_list; | ||
112 | struct p9_mux_poll_task *poll_task; | ||
113 | int msize; | ||
114 | unsigned char extended; | ||
115 | struct p9_trans *trans; | ||
116 | struct p9_idpool *tagpool; | ||
117 | int err; | ||
118 | wait_queue_head_t equeue; | ||
119 | struct list_head req_list; | ||
120 | struct list_head unsent_req_list; | ||
121 | struct p9_fcall *rcall; | ||
122 | int rpos; | ||
123 | char *rbuf; | ||
124 | int wpos; | ||
125 | int wsize; | ||
126 | char *wbuf; | ||
127 | wait_queue_t poll_wait[MAXPOLLWADDR]; | ||
128 | wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; | ||
129 | poll_table pt; | ||
130 | struct work_struct rq; | ||
131 | struct work_struct wq; | ||
132 | unsigned long wsched; | ||
133 | }; | ||
134 | |||
135 | struct p9_mux_poll_task { | ||
136 | struct task_struct *task; | ||
137 | struct list_head mux_list; | ||
138 | int muxnum; | ||
139 | }; | ||
140 | |||
141 | struct p9_mux_rpc { | ||
142 | struct p9_conn *m; | ||
143 | int err; | ||
144 | struct p9_fcall *tcall; | ||
145 | struct p9_fcall *rcall; | ||
146 | wait_queue_head_t wqueue; | ||
147 | }; | ||
148 | |||
149 | static int p9_poll_proc(void *); | ||
150 | static void p9_read_work(struct work_struct *work); | ||
151 | static void p9_write_work(struct work_struct *work); | ||
152 | static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, | ||
153 | poll_table *p); | ||
154 | static int p9_fd_write(struct p9_trans *trans, void *v, int len); | ||
155 | static int p9_fd_read(struct p9_trans *trans, void *v, int len); | ||
156 | |||
157 | static DEFINE_MUTEX(p9_mux_task_lock); | ||
158 | static struct workqueue_struct *p9_mux_wq; | ||
159 | |||
160 | static int p9_mux_num; | ||
161 | static int p9_mux_poll_task_num; | ||
162 | static struct p9_mux_poll_task p9_mux_poll_tasks[100]; | ||
163 | |||
164 | static void p9_conn_destroy(struct p9_conn *); | ||
165 | static unsigned int p9_fd_poll(struct p9_trans *trans, | ||
166 | struct poll_table_struct *pt); | ||
167 | |||
168 | #ifdef P9_NONBLOCK | ||
169 | static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, | ||
170 | p9_conn_req_callback cb, void *a); | ||
171 | #endif /* P9_NONBLOCK */ | ||
172 | |||
173 | static void p9_conn_cancel(struct p9_conn *m, int err); | ||
174 | |||
175 | static int p9_mux_global_init(void) | ||
176 | { | ||
177 | int i; | ||
178 | |||
179 | for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) | ||
180 | p9_mux_poll_tasks[i].task = NULL; | ||
181 | |||
182 | p9_mux_wq = create_workqueue("v9fs"); | ||
183 | if (!p9_mux_wq) { | ||
184 | printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); | ||
185 | return -ENOMEM; | ||
186 | } | ||
187 | |||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static u16 p9_mux_get_tag(struct p9_conn *m) | ||
192 | { | ||
193 | int tag; | ||
194 | |||
195 | tag = p9_idpool_get(m->tagpool); | ||
196 | if (tag < 0) | ||
197 | return P9_NOTAG; | ||
198 | else | ||
199 | return (u16) tag; | ||
200 | } | ||
201 | |||
202 | static void p9_mux_put_tag(struct p9_conn *m, u16 tag) | ||
203 | { | ||
204 | if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool)) | ||
205 | p9_idpool_put(tag, m->tagpool); | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * p9_mux_calc_poll_procs - calculates the number of polling procs | ||
210 | * based on the number of mounted v9fs filesystems. | ||
211 | * | ||
212 | * The current implementation returns sqrt of the number of mounts. | ||
213 | */ | ||
214 | static int p9_mux_calc_poll_procs(int muxnum) | ||
215 | { | ||
216 | int n; | ||
217 | |||
218 | if (p9_mux_poll_task_num) | ||
219 | n = muxnum / p9_mux_poll_task_num + | ||
220 | (muxnum % p9_mux_poll_task_num ? 1 : 0); | ||
221 | else | ||
222 | n = 1; | ||
223 | |||
224 | if (n > ARRAY_SIZE(p9_mux_poll_tasks)) | ||
225 | n = ARRAY_SIZE(p9_mux_poll_tasks); | ||
226 | |||
227 | return n; | ||
228 | } | ||
229 | |||
230 | static int p9_mux_poll_start(struct p9_conn *m) | ||
231 | { | ||
232 | int i, n; | ||
233 | struct p9_mux_poll_task *vpt, *vptlast; | ||
234 | struct task_struct *pproc; | ||
235 | |||
236 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num, | ||
237 | p9_mux_poll_task_num); | ||
238 | mutex_lock(&p9_mux_task_lock); | ||
239 | |||
240 | n = p9_mux_calc_poll_procs(p9_mux_num + 1); | ||
241 | if (n > p9_mux_poll_task_num) { | ||
242 | for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { | ||
243 | if (p9_mux_poll_tasks[i].task == NULL) { | ||
244 | vpt = &p9_mux_poll_tasks[i]; | ||
245 | P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n", | ||
246 | vpt); | ||
247 | pproc = kthread_create(p9_poll_proc, vpt, | ||
248 | "v9fs-poll"); | ||
249 | |||
250 | if (!IS_ERR(pproc)) { | ||
251 | vpt->task = pproc; | ||
252 | INIT_LIST_HEAD(&vpt->mux_list); | ||
253 | vpt->muxnum = 0; | ||
254 | p9_mux_poll_task_num++; | ||
255 | wake_up_process(vpt->task); | ||
256 | } | ||
257 | break; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) | ||
262 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
263 | "warning: no free poll slots\n"); | ||
264 | } | ||
265 | |||
266 | n = (p9_mux_num + 1) / p9_mux_poll_task_num + | ||
267 | ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0); | ||
268 | |||
269 | vptlast = NULL; | ||
270 | for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { | ||
271 | vpt = &p9_mux_poll_tasks[i]; | ||
272 | if (vpt->task != NULL) { | ||
273 | vptlast = vpt; | ||
274 | if (vpt->muxnum < n) { | ||
275 | P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); | ||
276 | list_add(&m->mux_list, &vpt->mux_list); | ||
277 | vpt->muxnum++; | ||
278 | m->poll_task = vpt; | ||
279 | memset(&m->poll_waddr, 0, | ||
280 | sizeof(m->poll_waddr)); | ||
281 | init_poll_funcptr(&m->pt, p9_pollwait); | ||
282 | break; | ||
283 | } | ||
284 | } | ||
285 | } | ||
286 | |||
287 | if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { | ||
288 | if (vptlast == NULL) { | ||
289 | mutex_unlock(&p9_mux_task_lock); | ||
290 | return -ENOMEM; | ||
291 | } | ||
292 | |||
293 | P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); | ||
294 | list_add(&m->mux_list, &vptlast->mux_list); | ||
295 | vptlast->muxnum++; | ||
296 | m->poll_task = vptlast; | ||
297 | memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); | ||
298 | init_poll_funcptr(&m->pt, p9_pollwait); | ||
299 | } | ||
300 | |||
301 | p9_mux_num++; | ||
302 | mutex_unlock(&p9_mux_task_lock); | ||
303 | |||
304 | return 0; | ||
305 | } | ||
306 | |||
307 | static void p9_mux_poll_stop(struct p9_conn *m) | ||
308 | { | ||
309 | int i; | ||
310 | struct p9_mux_poll_task *vpt; | ||
311 | |||
312 | mutex_lock(&p9_mux_task_lock); | ||
313 | vpt = m->poll_task; | ||
314 | list_del(&m->mux_list); | ||
315 | for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { | ||
316 | if (m->poll_waddr[i] != NULL) { | ||
317 | remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); | ||
318 | m->poll_waddr[i] = NULL; | ||
319 | } | ||
320 | } | ||
321 | vpt->muxnum--; | ||
322 | if (!vpt->muxnum) { | ||
323 | P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt); | ||
324 | kthread_stop(vpt->task); | ||
325 | vpt->task = NULL; | ||
326 | p9_mux_poll_task_num--; | ||
327 | } | ||
328 | p9_mux_num--; | ||
329 | mutex_unlock(&p9_mux_task_lock); | ||
330 | } | ||
331 | |||
332 | /** | ||
333 | * p9_conn_create - allocate and initialize the per-session mux data | ||
334 | * Creates the polling task if this is the first session. | ||
335 | * | ||
336 | * @trans - transport structure | ||
337 | * @msize - maximum message size | ||
338 | * @extended - extended flag | ||
339 | */ | ||
340 | static struct p9_conn *p9_conn_create(struct p9_trans *trans) | ||
341 | { | ||
342 | int i, n; | ||
343 | struct p9_conn *m, *mtmp; | ||
344 | |||
345 | P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, | ||
346 | trans->msize); | ||
347 | m = kmalloc(sizeof(struct p9_conn), GFP_KERNEL); | ||
348 | if (!m) | ||
349 | return ERR_PTR(-ENOMEM); | ||
350 | |||
351 | spin_lock_init(&m->lock); | ||
352 | INIT_LIST_HEAD(&m->mux_list); | ||
353 | m->msize = trans->msize; | ||
354 | m->extended = trans->extended; | ||
355 | m->trans = trans; | ||
356 | m->tagpool = p9_idpool_create(); | ||
357 | if (IS_ERR(m->tagpool)) { | ||
358 | mtmp = ERR_PTR(-ENOMEM); | ||
359 | kfree(m); | ||
360 | return mtmp; | ||
361 | } | ||
362 | |||
363 | m->err = 0; | ||
364 | init_waitqueue_head(&m->equeue); | ||
365 | INIT_LIST_HEAD(&m->req_list); | ||
366 | INIT_LIST_HEAD(&m->unsent_req_list); | ||
367 | m->rcall = NULL; | ||
368 | m->rpos = 0; | ||
369 | m->rbuf = NULL; | ||
370 | m->wpos = m->wsize = 0; | ||
371 | m->wbuf = NULL; | ||
372 | INIT_WORK(&m->rq, p9_read_work); | ||
373 | INIT_WORK(&m->wq, p9_write_work); | ||
374 | m->wsched = 0; | ||
375 | memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); | ||
376 | m->poll_task = NULL; | ||
377 | n = p9_mux_poll_start(m); | ||
378 | if (n) { | ||
379 | kfree(m); | ||
380 | return ERR_PTR(n); | ||
381 | } | ||
382 | |||
383 | n = p9_fd_poll(trans, &m->pt); | ||
384 | if (n & POLLIN) { | ||
385 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); | ||
386 | set_bit(Rpending, &m->wsched); | ||
387 | } | ||
388 | |||
389 | if (n & POLLOUT) { | ||
390 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); | ||
391 | set_bit(Wpending, &m->wsched); | ||
392 | } | ||
393 | |||
394 | for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { | ||
395 | if (IS_ERR(m->poll_waddr[i])) { | ||
396 | p9_mux_poll_stop(m); | ||
397 | mtmp = (void *)m->poll_waddr; /* the error code */ | ||
398 | kfree(m); | ||
399 | m = mtmp; | ||
400 | break; | ||
401 | } | ||
402 | } | ||
403 | |||
404 | return m; | ||
405 | } | ||
406 | |||
407 | /** | ||
408 | * p9_mux_destroy - cancels all pending requests and frees mux resources | ||
409 | */ | ||
410 | static void p9_conn_destroy(struct p9_conn *m) | ||
411 | { | ||
412 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, | ||
413 | m->mux_list.prev, m->mux_list.next); | ||
414 | p9_conn_cancel(m, -ECONNRESET); | ||
415 | |||
416 | if (!list_empty(&m->req_list)) { | ||
417 | /* wait until all processes waiting on this session exit */ | ||
418 | P9_DPRINTK(P9_DEBUG_MUX, | ||
419 | "mux %p waiting for empty request queue\n", m); | ||
420 | wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000); | ||
421 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p request queue empty: %d\n", m, | ||
422 | list_empty(&m->req_list)); | ||
423 | } | ||
424 | |||
425 | p9_mux_poll_stop(m); | ||
426 | m->trans = NULL; | ||
427 | p9_idpool_destroy(m->tagpool); | ||
428 | kfree(m); | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * p9_pollwait - called by files poll operation to add v9fs-poll task | ||
433 | * to files wait queue | ||
434 | */ | ||
435 | static void | ||
436 | p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) | ||
437 | { | ||
438 | int i; | ||
439 | struct p9_conn *m; | ||
440 | |||
441 | m = container_of(p, struct p9_conn, pt); | ||
442 | for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) | ||
443 | if (m->poll_waddr[i] == NULL) | ||
444 | break; | ||
445 | |||
446 | if (i >= ARRAY_SIZE(m->poll_waddr)) { | ||
447 | P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); | ||
448 | return; | ||
449 | } | ||
450 | |||
451 | m->poll_waddr[i] = wait_address; | ||
452 | |||
453 | if (!wait_address) { | ||
454 | P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n"); | ||
455 | m->poll_waddr[i] = ERR_PTR(-EIO); | ||
456 | return; | ||
457 | } | ||
458 | |||
459 | init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); | ||
460 | add_wait_queue(wait_address, &m->poll_wait[i]); | ||
461 | } | ||
462 | |||
463 | /** | ||
464 | * p9_poll_mux - polls a mux and schedules read or write works if necessary | ||
465 | */ | ||
466 | static void p9_poll_mux(struct p9_conn *m) | ||
467 | { | ||
468 | int n; | ||
469 | |||
470 | if (m->err < 0) | ||
471 | return; | ||
472 | |||
473 | n = p9_fd_poll(m->trans, NULL); | ||
474 | if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { | ||
475 | P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n); | ||
476 | if (n >= 0) | ||
477 | n = -ECONNRESET; | ||
478 | p9_conn_cancel(m, n); | ||
479 | } | ||
480 | |||
481 | if (n & POLLIN) { | ||
482 | set_bit(Rpending, &m->wsched); | ||
483 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); | ||
484 | if (!test_and_set_bit(Rworksched, &m->wsched)) { | ||
485 | P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); | ||
486 | queue_work(p9_mux_wq, &m->rq); | ||
487 | } | ||
488 | } | ||
489 | |||
490 | if (n & POLLOUT) { | ||
491 | set_bit(Wpending, &m->wsched); | ||
492 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); | ||
493 | if ((m->wsize || !list_empty(&m->unsent_req_list)) | ||
494 | && !test_and_set_bit(Wworksched, &m->wsched)) { | ||
495 | P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); | ||
496 | queue_work(p9_mux_wq, &m->wq); | ||
497 | } | ||
498 | } | ||
499 | } | ||
500 | |||
501 | /** | ||
502 | * p9_poll_proc - polls all v9fs transports for new events and queues | ||
503 | * the appropriate work to the work queue | ||
504 | */ | ||
505 | static int p9_poll_proc(void *a) | ||
506 | { | ||
507 | struct p9_conn *m, *mtmp; | ||
508 | struct p9_mux_poll_task *vpt; | ||
509 | |||
510 | vpt = a; | ||
511 | P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt); | ||
512 | while (!kthread_should_stop()) { | ||
513 | set_current_state(TASK_INTERRUPTIBLE); | ||
514 | |||
515 | list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { | ||
516 | p9_poll_mux(m); | ||
517 | } | ||
518 | |||
519 | P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n"); | ||
520 | schedule_timeout(SCHED_TIMEOUT * HZ); | ||
521 | } | ||
522 | |||
523 | __set_current_state(TASK_RUNNING); | ||
524 | P9_DPRINTK(P9_DEBUG_MUX, "finish\n"); | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | /** | ||
529 | * p9_write_work - called when a transport can send some data | ||
530 | */ | ||
531 | static void p9_write_work(struct work_struct *work) | ||
532 | { | ||
533 | int n, err; | ||
534 | struct p9_conn *m; | ||
535 | struct p9_req *req; | ||
536 | |||
537 | m = container_of(work, struct p9_conn, wq); | ||
538 | |||
539 | if (m->err < 0) { | ||
540 | clear_bit(Wworksched, &m->wsched); | ||
541 | return; | ||
542 | } | ||
543 | |||
544 | if (!m->wsize) { | ||
545 | if (list_empty(&m->unsent_req_list)) { | ||
546 | clear_bit(Wworksched, &m->wsched); | ||
547 | return; | ||
548 | } | ||
549 | |||
550 | spin_lock(&m->lock); | ||
551 | again: | ||
552 | req = list_entry(m->unsent_req_list.next, struct p9_req, | ||
553 | req_list); | ||
554 | list_move_tail(&req->req_list, &m->req_list); | ||
555 | if (req->err == ERREQFLUSH) | ||
556 | goto again; | ||
557 | |||
558 | m->wbuf = req->tcall->sdata; | ||
559 | m->wsize = req->tcall->size; | ||
560 | m->wpos = 0; | ||
561 | spin_unlock(&m->lock); | ||
562 | } | ||
563 | |||
564 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, | ||
565 | m->wsize); | ||
566 | clear_bit(Wpending, &m->wsched); | ||
567 | err = p9_fd_write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); | ||
568 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err); | ||
569 | if (err == -EAGAIN) { | ||
570 | clear_bit(Wworksched, &m->wsched); | ||
571 | return; | ||
572 | } | ||
573 | |||
574 | if (err < 0) | ||
575 | goto error; | ||
576 | else if (err == 0) { | ||
577 | err = -EREMOTEIO; | ||
578 | goto error; | ||
579 | } | ||
580 | |||
581 | m->wpos += err; | ||
582 | if (m->wpos == m->wsize) | ||
583 | m->wpos = m->wsize = 0; | ||
584 | |||
585 | if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { | ||
586 | if (test_and_clear_bit(Wpending, &m->wsched)) | ||
587 | n = POLLOUT; | ||
588 | else | ||
589 | n = p9_fd_poll(m->trans, NULL); | ||
590 | |||
591 | if (n & POLLOUT) { | ||
592 | P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); | ||
593 | queue_work(p9_mux_wq, &m->wq); | ||
594 | } else | ||
595 | clear_bit(Wworksched, &m->wsched); | ||
596 | } else | ||
597 | clear_bit(Wworksched, &m->wsched); | ||
598 | |||
599 | return; | ||
600 | |||
601 | error: | ||
602 | p9_conn_cancel(m, err); | ||
603 | clear_bit(Wworksched, &m->wsched); | ||
604 | } | ||
605 | |||
606 | static void process_request(struct p9_conn *m, struct p9_req *req) | ||
607 | { | ||
608 | int ecode; | ||
609 | struct p9_str *ename; | ||
610 | |||
611 | if (!req->err && req->rcall->id == P9_RERROR) { | ||
612 | ecode = req->rcall->params.rerror.errno; | ||
613 | ename = &req->rcall->params.rerror.error; | ||
614 | |||
615 | P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len, | ||
616 | ename->str); | ||
617 | |||
618 | if (m->extended) | ||
619 | req->err = -ecode; | ||
620 | |||
621 | if (!req->err) { | ||
622 | req->err = p9_errstr2errno(ename->str, ename->len); | ||
623 | |||
624 | /* string match failed */ | ||
625 | if (!req->err) { | ||
626 | PRINT_FCALL_ERROR("unknown error", req->rcall); | ||
627 | req->err = -ESERVERFAULT; | ||
628 | } | ||
629 | } | ||
630 | } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { | ||
631 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
632 | "fcall mismatch: expected %d, got %d\n", | ||
633 | req->tcall->id + 1, req->rcall->id); | ||
634 | if (!req->err) | ||
635 | req->err = -EIO; | ||
636 | } | ||
637 | } | ||
638 | |||
639 | /** | ||
640 | * p9_read_work - called when there is some data to be read from a transport | ||
641 | */ | ||
642 | static void p9_read_work(struct work_struct *work) | ||
643 | { | ||
644 | int n, err; | ||
645 | struct p9_conn *m; | ||
646 | struct p9_req *req, *rptr, *rreq; | ||
647 | struct p9_fcall *rcall; | ||
648 | char *rbuf; | ||
649 | |||
650 | m = container_of(work, struct p9_conn, rq); | ||
651 | |||
652 | if (m->err < 0) | ||
653 | return; | ||
654 | |||
655 | rcall = NULL; | ||
656 | P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); | ||
657 | |||
658 | if (!m->rcall) { | ||
659 | m->rcall = | ||
660 | kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL); | ||
661 | if (!m->rcall) { | ||
662 | err = -ENOMEM; | ||
663 | goto error; | ||
664 | } | ||
665 | |||
666 | m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); | ||
667 | m->rpos = 0; | ||
668 | } | ||
669 | |||
670 | clear_bit(Rpending, &m->wsched); | ||
671 | err = p9_fd_read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); | ||
672 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err); | ||
673 | if (err == -EAGAIN) { | ||
674 | clear_bit(Rworksched, &m->wsched); | ||
675 | return; | ||
676 | } | ||
677 | |||
678 | if (err <= 0) | ||
679 | goto error; | ||
680 | |||
681 | m->rpos += err; | ||
682 | while (m->rpos > 4) { | ||
683 | n = le32_to_cpu(*(__le32 *) m->rbuf); | ||
684 | if (n >= m->msize) { | ||
685 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
686 | "requested packet size too big: %d\n", n); | ||
687 | err = -EIO; | ||
688 | goto error; | ||
689 | } | ||
690 | |||
691 | if (m->rpos < n) | ||
692 | break; | ||
693 | |||
694 | err = | ||
695 | p9_deserialize_fcall(m->rbuf, n, m->rcall, m->extended); | ||
696 | if (err < 0) | ||
697 | goto error; | ||
698 | |||
699 | #ifdef CONFIG_NET_9P_DEBUG | ||
700 | if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { | ||
701 | char buf[150]; | ||
702 | |||
703 | p9_printfcall(buf, sizeof(buf), m->rcall, | ||
704 | m->extended); | ||
705 | printk(KERN_NOTICE ">>> %p %s\n", m, buf); | ||
706 | } | ||
707 | #endif | ||
708 | |||
709 | rcall = m->rcall; | ||
710 | rbuf = m->rbuf; | ||
711 | if (m->rpos > n) { | ||
712 | m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize, | ||
713 | GFP_KERNEL); | ||
714 | if (!m->rcall) { | ||
715 | err = -ENOMEM; | ||
716 | goto error; | ||
717 | } | ||
718 | |||
719 | m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); | ||
720 | memmove(m->rbuf, rbuf + n, m->rpos - n); | ||
721 | m->rpos -= n; | ||
722 | } else { | ||
723 | m->rcall = NULL; | ||
724 | m->rbuf = NULL; | ||
725 | m->rpos = 0; | ||
726 | } | ||
727 | |||
728 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, | ||
729 | rcall->id, rcall->tag); | ||
730 | |||
731 | req = NULL; | ||
732 | spin_lock(&m->lock); | ||
733 | list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { | ||
734 | if (rreq->tag == rcall->tag) { | ||
735 | req = rreq; | ||
736 | if (req->flush != Flushing) | ||
737 | list_del(&req->req_list); | ||
738 | break; | ||
739 | } | ||
740 | } | ||
741 | spin_unlock(&m->lock); | ||
742 | |||
743 | if (req) { | ||
744 | req->rcall = rcall; | ||
745 | process_request(m, req); | ||
746 | |||
747 | if (req->flush != Flushing) { | ||
748 | if (req->cb) | ||
749 | (*req->cb) (req, req->cba); | ||
750 | else | ||
751 | kfree(req->rcall); | ||
752 | |||
753 | wake_up(&m->equeue); | ||
754 | } | ||
755 | } else { | ||
756 | if (err >= 0 && rcall->id != P9_RFLUSH) | ||
757 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
758 | "unexpected response mux %p id %d tag %d\n", | ||
759 | m, rcall->id, rcall->tag); | ||
760 | kfree(rcall); | ||
761 | } | ||
762 | } | ||
763 | |||
764 | if (!list_empty(&m->req_list)) { | ||
765 | if (test_and_clear_bit(Rpending, &m->wsched)) | ||
766 | n = POLLIN; | ||
767 | else | ||
768 | n = p9_fd_poll(m->trans, NULL); | ||
769 | |||
770 | if (n & POLLIN) { | ||
771 | P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); | ||
772 | queue_work(p9_mux_wq, &m->rq); | ||
773 | } else | ||
774 | clear_bit(Rworksched, &m->wsched); | ||
775 | } else | ||
776 | clear_bit(Rworksched, &m->wsched); | ||
777 | |||
778 | return; | ||
779 | |||
780 | error: | ||
781 | p9_conn_cancel(m, err); | ||
782 | clear_bit(Rworksched, &m->wsched); | ||
783 | } | ||
784 | |||
785 | /** | ||
786 | * p9_send_request - send 9P request | ||
787 | * The function can sleep until the request is scheduled for sending. | ||
788 | * The function can be interrupted. Return from the function is not | ||
789 | * a guarantee that the request is sent successfully. Can return errors | ||
790 | * that can be retrieved by PTR_ERR macros. | ||
791 | * | ||
792 | * @m: mux data | ||
793 | * @tc: request to be sent | ||
794 | * @cb: callback function to call when response is received | ||
795 | * @cba: parameter to pass to the callback function | ||
796 | */ | ||
797 | static struct p9_req *p9_send_request(struct p9_conn *m, | ||
798 | struct p9_fcall *tc, | ||
799 | p9_conn_req_callback cb, void *cba) | ||
800 | { | ||
801 | int n; | ||
802 | struct p9_req *req; | ||
803 | |||
804 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, | ||
805 | tc, tc->id); | ||
806 | if (m->err < 0) | ||
807 | return ERR_PTR(m->err); | ||
808 | |||
809 | req = kmalloc(sizeof(struct p9_req), GFP_KERNEL); | ||
810 | if (!req) | ||
811 | return ERR_PTR(-ENOMEM); | ||
812 | |||
813 | if (tc->id == P9_TVERSION) | ||
814 | n = P9_NOTAG; | ||
815 | else | ||
816 | n = p9_mux_get_tag(m); | ||
817 | |||
818 | if (n < 0) | ||
819 | return ERR_PTR(-ENOMEM); | ||
820 | |||
821 | p9_set_tag(tc, n); | ||
822 | |||
823 | #ifdef CONFIG_NET_9P_DEBUG | ||
824 | if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { | ||
825 | char buf[150]; | ||
826 | |||
827 | p9_printfcall(buf, sizeof(buf), tc, m->extended); | ||
828 | printk(KERN_NOTICE "<<< %p %s\n", m, buf); | ||
829 | } | ||
830 | #endif | ||
831 | |||
832 | spin_lock_init(&req->lock); | ||
833 | req->tag = n; | ||
834 | req->tcall = tc; | ||
835 | req->rcall = NULL; | ||
836 | req->err = 0; | ||
837 | req->cb = cb; | ||
838 | req->cba = cba; | ||
839 | req->flush = None; | ||
840 | |||
841 | spin_lock(&m->lock); | ||
842 | list_add_tail(&req->req_list, &m->unsent_req_list); | ||
843 | spin_unlock(&m->lock); | ||
844 | |||
845 | if (test_and_clear_bit(Wpending, &m->wsched)) | ||
846 | n = POLLOUT; | ||
847 | else | ||
848 | n = p9_fd_poll(m->trans, NULL); | ||
849 | |||
850 | if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) | ||
851 | queue_work(p9_mux_wq, &m->wq); | ||
852 | |||
853 | return req; | ||
854 | } | ||
855 | |||
856 | static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) | ||
857 | { | ||
858 | p9_mux_put_tag(m, req->tag); | ||
859 | kfree(req); | ||
860 | } | ||
861 | |||
862 | static void p9_mux_flush_cb(struct p9_req *freq, void *a) | ||
863 | { | ||
864 | p9_conn_req_callback cb; | ||
865 | int tag; | ||
866 | struct p9_conn *m; | ||
867 | struct p9_req *req, *rreq, *rptr; | ||
868 | |||
869 | m = a; | ||
870 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, | ||
871 | freq->tcall, freq->rcall, freq->err, | ||
872 | freq->tcall->params.tflush.oldtag); | ||
873 | |||
874 | spin_lock(&m->lock); | ||
875 | cb = NULL; | ||
876 | tag = freq->tcall->params.tflush.oldtag; | ||
877 | req = NULL; | ||
878 | list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { | ||
879 | if (rreq->tag == tag) { | ||
880 | req = rreq; | ||
881 | list_del(&req->req_list); | ||
882 | break; | ||
883 | } | ||
884 | } | ||
885 | spin_unlock(&m->lock); | ||
886 | |||
887 | if (req) { | ||
888 | spin_lock(&req->lock); | ||
889 | req->flush = Flushed; | ||
890 | spin_unlock(&req->lock); | ||
891 | |||
892 | if (req->cb) | ||
893 | (*req->cb) (req, req->cba); | ||
894 | else | ||
895 | kfree(req->rcall); | ||
896 | |||
897 | wake_up(&m->equeue); | ||
898 | } | ||
899 | |||
900 | kfree(freq->tcall); | ||
901 | kfree(freq->rcall); | ||
902 | p9_mux_free_request(m, freq); | ||
903 | } | ||
904 | |||
905 | static int | ||
906 | p9_mux_flush_request(struct p9_conn *m, struct p9_req *req) | ||
907 | { | ||
908 | struct p9_fcall *fc; | ||
909 | struct p9_req *rreq, *rptr; | ||
910 | |||
911 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); | ||
912 | |||
913 | /* if a response was received for a request, do nothing */ | ||
914 | spin_lock(&req->lock); | ||
915 | if (req->rcall || req->err) { | ||
916 | spin_unlock(&req->lock); | ||
917 | P9_DPRINTK(P9_DEBUG_MUX, | ||
918 | "mux %p req %p response already received\n", m, req); | ||
919 | return 0; | ||
920 | } | ||
921 | |||
922 | req->flush = Flushing; | ||
923 | spin_unlock(&req->lock); | ||
924 | |||
925 | spin_lock(&m->lock); | ||
926 | /* if the request is not sent yet, just remove it from the list */ | ||
927 | list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { | ||
928 | if (rreq->tag == req->tag) { | ||
929 | P9_DPRINTK(P9_DEBUG_MUX, | ||
930 | "mux %p req %p request is not sent yet\n", m, req); | ||
931 | list_del(&rreq->req_list); | ||
932 | req->flush = Flushed; | ||
933 | spin_unlock(&m->lock); | ||
934 | if (req->cb) | ||
935 | (*req->cb) (req, req->cba); | ||
936 | return 0; | ||
937 | } | ||
938 | } | ||
939 | spin_unlock(&m->lock); | ||
940 | |||
941 | clear_thread_flag(TIF_SIGPENDING); | ||
942 | fc = p9_create_tflush(req->tag); | ||
943 | p9_send_request(m, fc, p9_mux_flush_cb, m); | ||
944 | return 1; | ||
945 | } | ||
946 | |||
947 | static void | ||
948 | p9_conn_rpc_cb(struct p9_req *req, void *a) | ||
949 | { | ||
950 | struct p9_mux_rpc *r; | ||
951 | |||
952 | P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a); | ||
953 | r = a; | ||
954 | r->rcall = req->rcall; | ||
955 | r->err = req->err; | ||
956 | |||
957 | if (req->flush != None && !req->err) | ||
958 | r->err = -ERESTARTSYS; | ||
959 | |||
960 | wake_up(&r->wqueue); | ||
961 | } | ||
962 | |||
963 | /** | ||
964 | * p9_fd_rpc- sends 9P request and waits until a response is available. | ||
965 | * The function can be interrupted. | ||
966 | * @m: mux data | ||
967 | * @tc: request to be sent | ||
968 | * @rc: pointer where a pointer to the response is stored | ||
969 | */ | ||
970 | int | ||
971 | p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) | ||
972 | { | ||
973 | struct p9_trans_fd *p = t->priv; | ||
974 | struct p9_conn *m = p->conn; | ||
975 | int err, sigpending; | ||
976 | unsigned long flags; | ||
977 | struct p9_req *req; | ||
978 | struct p9_mux_rpc r; | ||
979 | |||
980 | r.err = 0; | ||
981 | r.tcall = tc; | ||
982 | r.rcall = NULL; | ||
983 | r.m = m; | ||
984 | init_waitqueue_head(&r.wqueue); | ||
985 | |||
986 | if (rc) | ||
987 | *rc = NULL; | ||
988 | |||
989 | sigpending = 0; | ||
990 | if (signal_pending(current)) { | ||
991 | sigpending = 1; | ||
992 | clear_thread_flag(TIF_SIGPENDING); | ||
993 | } | ||
994 | |||
995 | req = p9_send_request(m, tc, p9_conn_rpc_cb, &r); | ||
996 | if (IS_ERR(req)) { | ||
997 | err = PTR_ERR(req); | ||
998 | P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); | ||
999 | return err; | ||
1000 | } | ||
1001 | |||
1002 | err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); | ||
1003 | if (r.err < 0) | ||
1004 | err = r.err; | ||
1005 | |||
1006 | if (err == -ERESTARTSYS && m->trans->status == Connected | ||
1007 | && m->err == 0) { | ||
1008 | if (p9_mux_flush_request(m, req)) { | ||
1009 | /* wait until we get response of the flush message */ | ||
1010 | do { | ||
1011 | clear_thread_flag(TIF_SIGPENDING); | ||
1012 | err = wait_event_interruptible(r.wqueue, | ||
1013 | r.rcall || r.err); | ||
1014 | } while (!r.rcall && !r.err && err == -ERESTARTSYS && | ||
1015 | m->trans->status == Connected && !m->err); | ||
1016 | |||
1017 | err = -ERESTARTSYS; | ||
1018 | } | ||
1019 | sigpending = 1; | ||
1020 | } | ||
1021 | |||
1022 | if (sigpending) { | ||
1023 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
1024 | recalc_sigpending(); | ||
1025 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
1026 | } | ||
1027 | |||
1028 | if (rc) | ||
1029 | *rc = r.rcall; | ||
1030 | else | ||
1031 | kfree(r.rcall); | ||
1032 | |||
1033 | p9_mux_free_request(m, req); | ||
1034 | if (err > 0) | ||
1035 | err = -EIO; | ||
1036 | |||
1037 | return err; | ||
1038 | } | ||
1039 | |||
1040 | #ifdef P9_NONBLOCK | ||
1041 | /** | ||
1042 | * p9_conn_rpcnb - sends 9P request without waiting for response. | ||
1043 | * @m: mux data | ||
1044 | * @tc: request to be sent | ||
1045 | * @cb: callback function to be called when response arrives | ||
1046 | * @cba: value to pass to the callback function | ||
1047 | */ | ||
1048 | int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, | ||
1049 | p9_conn_req_callback cb, void *a) | ||
1050 | { | ||
1051 | int err; | ||
1052 | struct p9_req *req; | ||
1053 | |||
1054 | req = p9_send_request(m, tc, cb, a); | ||
1055 | if (IS_ERR(req)) { | ||
1056 | err = PTR_ERR(req); | ||
1057 | P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); | ||
1058 | return PTR_ERR(req); | ||
1059 | } | ||
1060 | |||
1061 | P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); | ||
1062 | return 0; | ||
1063 | } | ||
1064 | #endif /* P9_NONBLOCK */ | ||
1065 | |||
1066 | /** | ||
1067 | * p9_conn_cancel - cancel all pending requests with error | ||
1068 | * @m: mux data | ||
1069 | * @err: error code | ||
1070 | */ | ||
1071 | void p9_conn_cancel(struct p9_conn *m, int err) | ||
1072 | { | ||
1073 | struct p9_req *req, *rtmp; | ||
1074 | LIST_HEAD(cancel_list); | ||
1075 | |||
1076 | P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); | ||
1077 | m->err = err; | ||
1078 | spin_lock(&m->lock); | ||
1079 | list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { | ||
1080 | list_move(&req->req_list, &cancel_list); | ||
1081 | } | ||
1082 | list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { | ||
1083 | list_move(&req->req_list, &cancel_list); | ||
1084 | } | ||
1085 | spin_unlock(&m->lock); | ||
1086 | |||
1087 | list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { | ||
1088 | list_del(&req->req_list); | ||
1089 | if (!req->err) | ||
1090 | req->err = err; | ||
1091 | |||
1092 | if (req->cb) | ||
1093 | (*req->cb) (req, req->cba); | ||
1094 | else | ||
1095 | kfree(req->rcall); | ||
1096 | } | ||
1097 | |||
1098 | wake_up(&m->equeue); | ||
1099 | } | ||
1100 | |||
75 | /** | 1101 | /** |
76 | * v9fs_parse_options - parse mount options into session structure | 1102 | * v9fs_parse_options - parse mount options into session structure |
77 | * @options: options string passed from mount | 1103 | * @options: options string passed from mount |
@@ -268,7 +1294,7 @@ end: | |||
268 | } | 1294 | } |
269 | 1295 | ||
270 | /** | 1296 | /** |
271 | * p9_sock_close - shutdown socket | 1297 | * p9_fd_close - shutdown socket |
272 | * @trans: private socket structure | 1298 | * @trans: private socket structure |
273 | * | 1299 | * |
274 | */ | 1300 | */ |
@@ -284,6 +1310,8 @@ static void p9_fd_close(struct p9_trans *trans) | |||
284 | if (!ts) | 1310 | if (!ts) |
285 | return; | 1311 | return; |
286 | 1312 | ||
1313 | p9_conn_destroy(ts->conn); | ||
1314 | |||
287 | trans->status = Disconnected; | 1315 | trans->status = Disconnected; |
288 | if (ts->rd) | 1316 | if (ts->rd) |
289 | fput(ts->rd); | 1317 | fput(ts->rd); |
@@ -292,13 +1320,15 @@ static void p9_fd_close(struct p9_trans *trans) | |||
292 | kfree(ts); | 1320 | kfree(ts); |
293 | } | 1321 | } |
294 | 1322 | ||
295 | static struct p9_trans *p9_trans_create_tcp(const char *addr, char *args) | 1323 | static struct p9_trans * |
1324 | p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) | ||
296 | { | 1325 | { |
297 | int err; | 1326 | int err; |
298 | struct p9_trans *trans; | 1327 | struct p9_trans *trans; |
299 | struct socket *csocket; | 1328 | struct socket *csocket; |
300 | struct sockaddr_in sin_server; | 1329 | struct sockaddr_in sin_server; |
301 | struct p9_fd_opts opts; | 1330 | struct p9_fd_opts opts; |
1331 | struct p9_trans_fd *p; | ||
302 | 1332 | ||
303 | parse_opts(args, &opts); | 1333 | parse_opts(args, &opts); |
304 | 1334 | ||
@@ -306,11 +1336,10 @@ static struct p9_trans *p9_trans_create_tcp(const char *addr, char *args) | |||
306 | trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); | 1336 | trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); |
307 | if (!trans) | 1337 | if (!trans) |
308 | return ERR_PTR(-ENOMEM); | 1338 | return ERR_PTR(-ENOMEM); |
309 | 1339 | trans->msize = msize; | |
310 | trans->write = p9_fd_write; | 1340 | trans->extended = dotu; |
311 | trans->read = p9_fd_read; | 1341 | trans->rpc = p9_fd_rpc; |
312 | trans->close = p9_fd_close; | 1342 | trans->close = p9_fd_close; |
313 | trans->poll = p9_fd_poll; | ||
314 | 1343 | ||
315 | sin_server.sin_family = AF_INET; | 1344 | sin_server.sin_family = AF_INET; |
316 | sin_server.sin_addr.s_addr = in_aton(addr); | 1345 | sin_server.sin_addr.s_addr = in_aton(addr); |
@@ -337,6 +1366,14 @@ static struct p9_trans *p9_trans_create_tcp(const char *addr, char *args) | |||
337 | if (err < 0) | 1366 | if (err < 0) |
338 | goto error; | 1367 | goto error; |
339 | 1368 | ||
1369 | p = (struct p9_trans_fd *) trans->priv; | ||
1370 | p->conn = p9_conn_create(trans); | ||
1371 | if (IS_ERR(p->conn)) { | ||
1372 | err = PTR_ERR(p->conn); | ||
1373 | p->conn = NULL; | ||
1374 | goto error; | ||
1375 | } | ||
1376 | |||
340 | return trans; | 1377 | return trans; |
341 | 1378 | ||
342 | error: | 1379 | error: |
@@ -347,22 +1384,23 @@ error: | |||
347 | return ERR_PTR(err); | 1384 | return ERR_PTR(err); |
348 | } | 1385 | } |
349 | 1386 | ||
350 | static struct p9_trans *p9_trans_create_unix(const char *addr, char *args) | 1387 | static struct p9_trans * |
1388 | p9_trans_create_unix(const char *addr, char *args, int msize, | ||
1389 | unsigned char dotu) | ||
351 | { | 1390 | { |
352 | int err; | 1391 | int err; |
353 | struct socket *csocket; | 1392 | struct socket *csocket; |
354 | struct sockaddr_un sun_server; | 1393 | struct sockaddr_un sun_server; |
355 | struct p9_trans *trans; | 1394 | struct p9_trans *trans; |
1395 | struct p9_trans_fd *p; | ||
356 | 1396 | ||
357 | csocket = NULL; | 1397 | csocket = NULL; |
358 | trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); | 1398 | trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); |
359 | if (!trans) | 1399 | if (!trans) |
360 | return ERR_PTR(-ENOMEM); | 1400 | return ERR_PTR(-ENOMEM); |
361 | 1401 | ||
362 | trans->write = p9_fd_write; | 1402 | trans->rpc = p9_fd_rpc; |
363 | trans->read = p9_fd_read; | ||
364 | trans->close = p9_fd_close; | 1403 | trans->close = p9_fd_close; |
365 | trans->poll = p9_fd_poll; | ||
366 | 1404 | ||
367 | if (strlen(addr) > UNIX_PATH_MAX) { | 1405 | if (strlen(addr) > UNIX_PATH_MAX) { |
368 | P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", | 1406 | P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", |
@@ -387,6 +1425,16 @@ static struct p9_trans *p9_trans_create_unix(const char *addr, char *args) | |||
387 | if (err < 0) | 1425 | if (err < 0) |
388 | goto error; | 1426 | goto error; |
389 | 1427 | ||
1428 | trans->msize = msize; | ||
1429 | trans->extended = dotu; | ||
1430 | p = (struct p9_trans_fd *) trans->priv; | ||
1431 | p->conn = p9_conn_create(trans); | ||
1432 | if (IS_ERR(p->conn)) { | ||
1433 | err = PTR_ERR(p->conn); | ||
1434 | p->conn = NULL; | ||
1435 | goto error; | ||
1436 | } | ||
1437 | |||
390 | return trans; | 1438 | return trans; |
391 | 1439 | ||
392 | error: | 1440 | error: |
@@ -397,11 +1445,14 @@ error: | |||
397 | return ERR_PTR(err); | 1445 | return ERR_PTR(err); |
398 | } | 1446 | } |
399 | 1447 | ||
400 | static struct p9_trans *p9_trans_create_fd(const char *name, char *args) | 1448 | static struct p9_trans * |
1449 | p9_trans_create_fd(const char *name, char *args, int msize, | ||
1450 | unsigned char extended) | ||
401 | { | 1451 | { |
402 | int err; | 1452 | int err; |
403 | struct p9_trans *trans; | 1453 | struct p9_trans *trans; |
404 | struct p9_fd_opts opts; | 1454 | struct p9_fd_opts opts; |
1455 | struct p9_trans_fd *p; | ||
405 | 1456 | ||
406 | parse_opts(args, &opts); | 1457 | parse_opts(args, &opts); |
407 | 1458 | ||
@@ -414,15 +1465,23 @@ static struct p9_trans *p9_trans_create_fd(const char *name, char *args) | |||
414 | if (!trans) | 1465 | if (!trans) |
415 | return ERR_PTR(-ENOMEM); | 1466 | return ERR_PTR(-ENOMEM); |
416 | 1467 | ||
417 | trans->write = p9_fd_write; | 1468 | trans->rpc = p9_fd_rpc; |
418 | trans->read = p9_fd_read; | ||
419 | trans->close = p9_fd_close; | 1469 | trans->close = p9_fd_close; |
420 | trans->poll = p9_fd_poll; | ||
421 | 1470 | ||
422 | err = p9_fd_open(trans, opts.rfd, opts.wfd); | 1471 | err = p9_fd_open(trans, opts.rfd, opts.wfd); |
423 | if (err < 0) | 1472 | if (err < 0) |
424 | goto error; | 1473 | goto error; |
425 | 1474 | ||
1475 | trans->msize = msize; | ||
1476 | trans->extended = extended; | ||
1477 | p = (struct p9_trans_fd *) trans->priv; | ||
1478 | p->conn = p9_conn_create(trans); | ||
1479 | if (IS_ERR(p->conn)) { | ||
1480 | err = PTR_ERR(p->conn); | ||
1481 | p->conn = NULL; | ||
1482 | goto error; | ||
1483 | } | ||
1484 | |||
426 | return trans; | 1485 | return trans; |
427 | 1486 | ||
428 | error: | 1487 | error: |
@@ -453,6 +1512,12 @@ static struct p9_trans_module p9_fd_trans = { | |||
453 | 1512 | ||
454 | static int __init p9_trans_fd_init(void) | 1513 | static int __init p9_trans_fd_init(void) |
455 | { | 1514 | { |
1515 | int ret = p9_mux_global_init(); | ||
1516 | if (ret) { | ||
1517 | printk(KERN_WARNING "9p: starting mux failed\n"); | ||
1518 | return ret; | ||
1519 | } | ||
1520 | |||
456 | v9fs_register_trans(&p9_tcp_trans); | 1521 | v9fs_register_trans(&p9_tcp_trans); |
457 | v9fs_register_trans(&p9_unix_trans); | 1522 | v9fs_register_trans(&p9_unix_trans); |
458 | v9fs_register_trans(&p9_fd_trans); | 1523 | v9fs_register_trans(&p9_fd_trans); |
@@ -460,13 +1525,7 @@ static int __init p9_trans_fd_init(void) | |||
460 | return 1; | 1525 | return 1; |
461 | } | 1526 | } |
462 | 1527 | ||
463 | static void __exit p9_trans_fd_exit(void) { | ||
464 | printk(KERN_ERR "Removal of 9p transports not implemented\n"); | ||
465 | BUG(); | ||
466 | } | ||
467 | |||
468 | module_init(p9_trans_fd_init); | 1528 | module_init(p9_trans_fd_init); |
469 | module_exit(p9_trans_fd_exit); | ||
470 | 1529 | ||
471 | MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); | 1530 | MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); |
472 | MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); | 1531 | MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); |
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 40b71a29fc3f..0117b9fb8480 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c | |||
@@ -1,17 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * The Guest 9p transport driver | 2 | * The Guest 9p transport driver |
3 | * | 3 | * |
4 | * This is a trivial pipe-based transport driver based on the lguest console | 4 | * This is a block based transport driver based on the lguest block driver |
5 | * code: we use lguest's DMA mechanism to send bytes out, and register a | 5 | * code. |
6 | * DMA buffer to receive bytes in. It is assumed to be present and available | ||
7 | * from the very beginning of boot. | ||
8 | * | ||
9 | * This may be have been done by just instaniating another HVC console, | ||
10 | * but HVC's blocksize of 16 bytes is annoying and painful to performance. | ||
11 | * | ||
12 | * A more efficient transport could be built based on the virtio block driver | ||
13 | * but it requires some changes in the 9p transport model (which are in | ||
14 | * progress) | ||
15 | * | 6 | * |
16 | */ | 7 | */ |
17 | /* | 8 | /* |
@@ -55,11 +46,25 @@ | |||
55 | #include <linux/virtio.h> | 46 | #include <linux/virtio.h> |
56 | #include <linux/virtio_9p.h> | 47 | #include <linux/virtio_9p.h> |
57 | 48 | ||
49 | #define VIRTQUEUE_NUM 128 | ||
50 | |||
58 | /* a single mutex to manage channel initialization and attachment */ | 51 | /* a single mutex to manage channel initialization and attachment */ |
59 | static DECLARE_MUTEX(virtio_9p_lock); | 52 | static DECLARE_MUTEX(virtio_9p_lock); |
60 | /* global which tracks highest initialized channel */ | 53 | /* global which tracks highest initialized channel */ |
61 | static int chan_index; | 54 | static int chan_index; |
62 | 55 | ||
56 | #define P9_INIT_MAXTAG 16 | ||
57 | |||
58 | #define REQ_STATUS_IDLE 0 | ||
59 | #define REQ_STATUS_SENT 1 | ||
60 | #define REQ_STATUS_RCVD 2 | ||
61 | #define REQ_STATUS_FLSH 3 | ||
62 | |||
63 | struct p9_req_t { | ||
64 | int status; | ||
65 | wait_queue_head_t *wq; | ||
66 | }; | ||
67 | |||
63 | /* We keep all per-channel information in a structure. | 68 | /* We keep all per-channel information in a structure. |
64 | * This structure is allocated within the devices dev->mem space. | 69 | * This structure is allocated within the devices dev->mem space. |
65 | * A pointer to the structure will get put in the transport private. | 70 | * A pointer to the structure will get put in the transport private. |
@@ -68,148 +73,198 @@ static struct virtio_chan { | |||
68 | bool initialized; /* channel is initialized */ | 73 | bool initialized; /* channel is initialized */ |
69 | bool inuse; /* channel is in use */ | 74 | bool inuse; /* channel is in use */ |
70 | 75 | ||
71 | struct virtqueue *in_vq, *out_vq; | 76 | spinlock_t lock; |
77 | |||
72 | struct virtio_device *vdev; | 78 | struct virtio_device *vdev; |
79 | struct virtqueue *vq; | ||
73 | 80 | ||
74 | /* This is our input buffer, and how much data is left in it. */ | 81 | struct p9_idpool *tagpool; |
75 | unsigned int in_len; | 82 | struct p9_req_t *reqs; |
76 | char *in, *inbuf; | 83 | int max_tag; |
77 | 84 | ||
78 | wait_queue_head_t wq; /* waitq for buffer */ | 85 | /* Scatterlist: can be too big for stack. */ |
86 | struct scatterlist sg[VIRTQUEUE_NUM]; | ||
79 | } channels[MAX_9P_CHAN]; | 87 | } channels[MAX_9P_CHAN]; |
80 | 88 | ||
89 | /* Lookup requests by tag */ | ||
90 | static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag) | ||
91 | { | ||
92 | /* This looks up the original request by tag so we know which | ||
93 | * buffer to read the data into */ | ||
94 | tag++; | ||
95 | |||
96 | while (tag >= c->max_tag) { | ||
97 | int old_max = c->max_tag; | ||
98 | int count; | ||
99 | |||
100 | if (c->max_tag) | ||
101 | c->max_tag *= 2; | ||
102 | else | ||
103 | c->max_tag = P9_INIT_MAXTAG; | ||
104 | |||
105 | c->reqs = krealloc(c->reqs, sizeof(struct p9_req_t)*c->max_tag, | ||
106 | GFP_ATOMIC); | ||
107 | if (!c->reqs) { | ||
108 | printk(KERN_ERR "Couldn't grow tag array\n"); | ||
109 | BUG(); | ||
110 | } | ||
111 | for (count = old_max; count < c->max_tag; count++) { | ||
112 | c->reqs[count].status = REQ_STATUS_IDLE; | ||
113 | c->reqs[count].wq = kmalloc(sizeof(wait_queue_t), | ||
114 | GFP_ATOMIC); | ||
115 | if (!c->reqs[count].wq) { | ||
116 | printk(KERN_ERR "Couldn't grow tag array\n"); | ||
117 | BUG(); | ||
118 | } | ||
119 | init_waitqueue_head(c->reqs[count].wq); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | return &c->reqs[tag]; | ||
124 | } | ||
125 | |||
126 | |||
81 | /* How many bytes left in this page. */ | 127 | /* How many bytes left in this page. */ |
82 | static unsigned int rest_of_page(void *data) | 128 | static unsigned int rest_of_page(void *data) |
83 | { | 129 | { |
84 | return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); | 130 | return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); |
85 | } | 131 | } |
86 | 132 | ||
87 | static int p9_virtio_write(struct p9_trans *trans, void *buf, int count) | 133 | static void p9_virtio_close(struct p9_trans *trans) |
88 | { | 134 | { |
89 | struct virtio_chan *chan = (struct virtio_chan *) trans->priv; | 135 | struct virtio_chan *chan = trans->priv; |
90 | struct virtqueue *out_vq = chan->out_vq; | 136 | int count; |
91 | struct scatterlist sg[1]; | 137 | unsigned int flags; |
92 | unsigned int len; | ||
93 | 138 | ||
94 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio write (%d)\n", count); | 139 | spin_lock_irqsave(&chan->lock, flags); |
140 | p9_idpool_destroy(chan->tagpool); | ||
141 | for (count = 0; count < chan->max_tag; count++) | ||
142 | kfree(chan->reqs[count].wq); | ||
143 | kfree(chan->reqs); | ||
144 | chan->max_tag = 0; | ||
145 | spin_unlock_irqrestore(&chan->lock, flags); | ||
95 | 146 | ||
96 | /* keep it simple - make sure we don't overflow a page */ | 147 | down(&virtio_9p_lock); |
97 | if (rest_of_page(buf) < count) | 148 | chan->inuse = false; |
98 | count = rest_of_page(buf); | 149 | up(&virtio_9p_lock); |
99 | 150 | ||
100 | sg_init_one(sg, buf, count); | 151 | kfree(trans); |
152 | } | ||
101 | 153 | ||
102 | /* add_buf wants a token to identify this buffer: we hand it any | 154 | static void req_done(struct virtqueue *vq) |
103 | * non-NULL pointer, since there's only ever one buffer. */ | 155 | { |
104 | if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) { | 156 | struct virtio_chan *chan = vq->vdev->priv; |
105 | /* Tell Host to go! */ | 157 | struct p9_fcall *rc; |
106 | out_vq->vq_ops->kick(out_vq); | 158 | unsigned int len; |
107 | /* Chill out until it's done with the buffer. */ | 159 | unsigned long flags; |
108 | while (!out_vq->vq_ops->get_buf(out_vq, &len)) | 160 | struct p9_req_t *req; |
109 | cpu_relax(); | 161 | |
162 | spin_lock_irqsave(&chan->lock, flags); | ||
163 | while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { | ||
164 | req = p9_lookup_tag(chan, rc->tag); | ||
165 | req->status = REQ_STATUS_RCVD; | ||
166 | wake_up(req->wq); | ||
110 | } | 167 | } |
111 | 168 | /* In case queue is stopped waiting for more buffers. */ | |
112 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio wrote (%d)\n", count); | 169 | spin_unlock_irqrestore(&chan->lock, flags); |
113 | |||
114 | /* We're expected to return the amount of data we wrote: all of it. */ | ||
115 | return count; | ||
116 | } | 170 | } |
117 | 171 | ||
118 | /* Create a scatter-gather list representing our input buffer and put it in the | 172 | static int |
119 | * queue. */ | 173 | pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, |
120 | static void add_inbuf(struct virtio_chan *chan) | 174 | int count) |
121 | { | 175 | { |
122 | struct scatterlist sg[1]; | 176 | int s; |
123 | 177 | int index = start; | |
124 | sg_init_one(sg, chan->inbuf, PAGE_SIZE); | 178 | |
179 | while (count) { | ||
180 | s = rest_of_page(data); | ||
181 | if (s > count) | ||
182 | s = count; | ||
183 | sg_set_buf(&sg[index++], data, s); | ||
184 | count -= s; | ||
185 | data += s; | ||
186 | if (index > limit) | ||
187 | BUG(); | ||
188 | } | ||
125 | 189 | ||
126 | /* We should always be able to add one buffer to an empty queue. */ | 190 | return index-start; |
127 | if (chan->in_vq->vq_ops->add_buf(chan->in_vq, sg, 0, 1, chan->inbuf)) | ||
128 | BUG(); | ||
129 | chan->in_vq->vq_ops->kick(chan->in_vq); | ||
130 | } | 191 | } |
131 | 192 | ||
132 | static int p9_virtio_read(struct p9_trans *trans, void *buf, int count) | 193 | static int |
194 | p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) | ||
133 | { | 195 | { |
134 | struct virtio_chan *chan = (struct virtio_chan *) trans->priv; | 196 | int in, out; |
135 | struct virtqueue *in_vq = chan->in_vq; | 197 | int n, err, size; |
136 | 198 | struct virtio_chan *chan = t->priv; | |
137 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio read (%d)\n", count); | 199 | char *rdata; |
200 | struct p9_req_t *req; | ||
201 | unsigned long flags; | ||
202 | |||
203 | if (*rc == NULL) { | ||
204 | *rc = kmalloc(sizeof(struct p9_fcall) + t->msize, GFP_KERNEL); | ||
205 | if (!*rc) | ||
206 | return -ENOMEM; | ||
207 | } | ||
138 | 208 | ||
139 | /* If we don't have an input queue yet, we can't get input. */ | 209 | rdata = (char *)*rc+sizeof(struct p9_fcall); |
140 | BUG_ON(!in_vq); | ||
141 | 210 | ||
142 | /* No buffer? Try to get one. */ | 211 | n = P9_NOTAG; |
143 | if (!chan->in_len) { | 212 | if (tc->id != P9_TVERSION) { |
144 | chan->in = in_vq->vq_ops->get_buf(in_vq, &chan->in_len); | 213 | n = p9_idpool_get(chan->tagpool); |
145 | if (!chan->in) | 214 | if (n < 0) |
146 | return 0; | 215 | return -ENOMEM; |
147 | } | 216 | } |
148 | 217 | ||
149 | /* You want more than we have to give? Well, try wanting less! */ | 218 | spin_lock_irqsave(&chan->lock, flags); |
150 | if (chan->in_len < count) | 219 | req = p9_lookup_tag(chan, n); |
151 | count = chan->in_len; | 220 | spin_unlock_irqrestore(&chan->lock, flags); |
152 | 221 | ||
153 | /* Copy across to their buffer and increment offset. */ | 222 | p9_set_tag(tc, n); |
154 | memcpy(buf, chan->in, count); | ||
155 | chan->in += count; | ||
156 | chan->in_len -= count; | ||
157 | 223 | ||
158 | /* Finished? Re-register buffer so Host will use it again. */ | 224 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n); |
159 | if (chan->in_len == 0) | ||
160 | add_inbuf(chan); | ||
161 | 225 | ||
162 | P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio finished read (%d)\n", | 226 | out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size); |
163 | count); | 227 | in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, t->msize); |
164 | |||
165 | return count; | ||
166 | } | ||
167 | 228 | ||
168 | /* The poll function is used by 9p transports to determine if there | 229 | req->status = REQ_STATUS_SENT; |
169 | * is there is activity available on a particular channel. In our case | ||
170 | * we use it to wait for a callback from the input routines. | ||
171 | */ | ||
172 | static unsigned int | ||
173 | p9_virtio_poll(struct p9_trans *trans, struct poll_table_struct *pt) | ||
174 | { | ||
175 | struct virtio_chan *chan = (struct virtio_chan *)trans->priv; | ||
176 | struct virtqueue *in_vq = chan->in_vq; | ||
177 | int ret = POLLOUT; /* we can always handle more output */ | ||
178 | 230 | ||
179 | poll_wait(NULL, &chan->wq, pt); | 231 | if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, tc)) { |
232 | P9_DPRINTK(P9_DEBUG_TRANS, | ||
233 | "9p debug: virtio rpc add_buf returned failure"); | ||
234 | return -EIO; | ||
235 | } | ||
180 | 236 | ||
181 | /* No buffer? Try to get one. */ | 237 | chan->vq->vq_ops->kick(chan->vq); |
182 | if (!chan->in_len) | ||
183 | chan->in = in_vq->vq_ops->get_buf(in_vq, &chan->in_len); | ||
184 | 238 | ||
185 | if (chan->in_len) | 239 | wait_event(*req->wq, req->status == REQ_STATUS_RCVD); |
186 | ret |= POLLIN; | ||
187 | 240 | ||
188 | return ret; | 241 | size = le32_to_cpu(*(__le32 *) rdata); |
189 | } | ||
190 | 242 | ||
191 | static void p9_virtio_close(struct p9_trans *trans) | 243 | err = p9_deserialize_fcall(rdata, size, *rc, t->extended); |
192 | { | 244 | if (err < 0) { |
193 | struct virtio_chan *chan = trans->priv; | 245 | P9_DPRINTK(P9_DEBUG_TRANS, |
246 | "9p debug: virtio rpc deserialize returned %d\n", err); | ||
247 | return err; | ||
248 | } | ||
194 | 249 | ||
195 | down(&virtio_9p_lock); | 250 | #ifdef CONFIG_NET_9P_DEBUG |
196 | chan->inuse = false; | 251 | if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { |
197 | up(&virtio_9p_lock); | 252 | char buf[150]; |
198 | 253 | ||
199 | kfree(trans); | 254 | p9_printfcall(buf, sizeof(buf), *rc, t->extended); |
200 | } | 255 | printk(KERN_NOTICE ">>> %p %s\n", t, buf); |
256 | } | ||
257 | #endif | ||
201 | 258 | ||
202 | static bool p9_virtio_intr(struct virtqueue *q) | 259 | if (n != P9_NOTAG && p9_idpool_check(n, chan->tagpool)) |
203 | { | 260 | p9_idpool_put(n, chan->tagpool); |
204 | struct virtio_chan *chan = q->vdev->priv; | ||
205 | 261 | ||
206 | P9_DPRINTK(P9_DEBUG_TRANS, "9p poll_wakeup: %p\n", &chan->wq); | 262 | req->status = REQ_STATUS_IDLE; |
207 | wake_up_interruptible(&chan->wq); | ||
208 | 263 | ||
209 | return true; | 264 | return 0; |
210 | } | 265 | } |
211 | 266 | ||
212 | static int p9_virtio_probe(struct virtio_device *dev) | 267 | static int p9_virtio_probe(struct virtio_device *vdev) |
213 | { | 268 | { |
214 | int err; | 269 | int err; |
215 | struct virtio_chan *chan; | 270 | struct virtio_chan *chan; |
@@ -223,44 +278,29 @@ static int p9_virtio_probe(struct virtio_device *dev) | |||
223 | if (chan_index > MAX_9P_CHAN) { | 278 | if (chan_index > MAX_9P_CHAN) { |
224 | printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); | 279 | printk(KERN_ERR "9p: virtio: Maximum channels exceeded\n"); |
225 | BUG(); | 280 | BUG(); |
226 | } | ||
227 | |||
228 | chan->vdev = dev; | ||
229 | |||
230 | /* This is the scratch page we use to receive console input */ | ||
231 | chan->inbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
232 | if (!chan->inbuf) { | ||
233 | err = -ENOMEM; | 281 | err = -ENOMEM; |
234 | goto fail; | 282 | goto fail; |
235 | } | 283 | } |
236 | 284 | ||
237 | /* Find the input queue. */ | 285 | chan->vdev = vdev; |
238 | dev->priv = chan; | ||
239 | chan->in_vq = dev->config->find_vq(dev, p9_virtio_intr); | ||
240 | if (IS_ERR(chan->in_vq)) { | ||
241 | err = PTR_ERR(chan->in_vq); | ||
242 | goto free; | ||
243 | } | ||
244 | 286 | ||
245 | chan->out_vq = dev->config->find_vq(dev, NULL); | 287 | /* We expect one virtqueue, for requests. */ |
246 | if (IS_ERR(chan->out_vq)) { | 288 | chan->vq = vdev->config->find_vq(vdev, 0, req_done); |
247 | err = PTR_ERR(chan->out_vq); | 289 | if (IS_ERR(chan->vq)) { |
248 | goto free_in_vq; | 290 | err = PTR_ERR(chan->vq); |
291 | goto out_free_vq; | ||
249 | } | 292 | } |
293 | chan->vq->vdev->priv = chan; | ||
294 | spin_lock_init(&chan->lock); | ||
250 | 295 | ||
251 | init_waitqueue_head(&chan->wq); | 296 | sg_init_table(chan->sg, VIRTQUEUE_NUM); |
252 | 297 | ||
253 | /* Register the input buffer the first time. */ | ||
254 | add_inbuf(chan); | ||
255 | chan->inuse = false; | 298 | chan->inuse = false; |
256 | chan->initialized = true; | 299 | chan->initialized = true; |
257 | |||
258 | return 0; | 300 | return 0; |
259 | 301 | ||
260 | free_in_vq: | 302 | out_free_vq: |
261 | dev->config->del_vq(chan->in_vq); | 303 | vdev->config->del_vq(chan->vq); |
262 | free: | ||
263 | kfree(chan->inbuf); | ||
264 | fail: | 304 | fail: |
265 | down(&virtio_9p_lock); | 305 | down(&virtio_9p_lock); |
266 | chan_index--; | 306 | chan_index--; |
@@ -273,11 +313,13 @@ fail: | |||
273 | * alternate channels by matching devname versus a virtio_config entry. | 313 | * alternate channels by matching devname versus a virtio_config entry. |
274 | * We use a simple reference count mechanism to ensure that only a single | 314 | * We use a simple reference count mechanism to ensure that only a single |
275 | * mount has a channel open at a time. */ | 315 | * mount has a channel open at a time. */ |
276 | static struct p9_trans *p9_virtio_create(const char *devname, char *args) | 316 | static struct p9_trans * |
317 | p9_virtio_create(const char *devname, char *args, int msize, | ||
318 | unsigned char extended) | ||
277 | { | 319 | { |
278 | struct p9_trans *trans; | 320 | struct p9_trans *trans; |
279 | int index = 0; | ||
280 | struct virtio_chan *chan = channels; | 321 | struct virtio_chan *chan = channels; |
322 | int index = 0; | ||
281 | 323 | ||
282 | down(&virtio_9p_lock); | 324 | down(&virtio_9p_lock); |
283 | while (index < MAX_9P_CHAN) { | 325 | while (index < MAX_9P_CHAN) { |
@@ -292,25 +334,45 @@ static struct p9_trans *p9_virtio_create(const char *devname, char *args) | |||
292 | up(&virtio_9p_lock); | 334 | up(&virtio_9p_lock); |
293 | 335 | ||
294 | if (index >= MAX_9P_CHAN) { | 336 | if (index >= MAX_9P_CHAN) { |
295 | printk(KERN_ERR "9p: virtio: couldn't find a free channel\n"); | 337 | printk(KERN_ERR "9p: no channels available\n"); |
296 | return NULL; | 338 | return ERR_PTR(-ENODEV); |
297 | } | 339 | } |
298 | 340 | ||
341 | chan->tagpool = p9_idpool_create(); | ||
342 | if (IS_ERR(chan->tagpool)) { | ||
343 | printk(KERN_ERR "9p: couldn't allocate tagpool\n"); | ||
344 | return ERR_PTR(-ENOMEM); | ||
345 | } | ||
346 | p9_idpool_get(chan->tagpool); /* reserve tag 0 */ | ||
347 | chan->max_tag = 0; | ||
348 | chan->reqs = NULL; | ||
349 | |||
299 | trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); | 350 | trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); |
300 | if (!trans) { | 351 | if (!trans) { |
301 | printk(KERN_ERR "9p: couldn't allocate transport\n"); | 352 | printk(KERN_ERR "9p: couldn't allocate transport\n"); |
302 | return ERR_PTR(-ENOMEM); | 353 | return ERR_PTR(-ENOMEM); |
303 | } | 354 | } |
304 | 355 | trans->extended = extended; | |
305 | trans->write = p9_virtio_write; | 356 | trans->msize = msize; |
306 | trans->read = p9_virtio_read; | ||
307 | trans->close = p9_virtio_close; | 357 | trans->close = p9_virtio_close; |
308 | trans->poll = p9_virtio_poll; | 358 | trans->rpc = p9_virtio_rpc; |
309 | trans->priv = chan; | 359 | trans->priv = chan; |
310 | 360 | ||
311 | return trans; | 361 | return trans; |
312 | } | 362 | } |
313 | 363 | ||
364 | static void p9_virtio_remove(struct virtio_device *vdev) | ||
365 | { | ||
366 | struct virtio_chan *chan = vdev->priv; | ||
367 | |||
368 | BUG_ON(chan->inuse); | ||
369 | |||
370 | if (chan->initialized) { | ||
371 | vdev->config->del_vq(chan->vq); | ||
372 | chan->initialized = false; | ||
373 | } | ||
374 | } | ||
375 | |||
314 | #define VIRTIO_ID_9P 9 | 376 | #define VIRTIO_ID_9P 9 |
315 | 377 | ||
316 | static struct virtio_device_id id_table[] = { | 378 | static struct virtio_device_id id_table[] = { |
@@ -324,12 +386,13 @@ static struct virtio_driver p9_virtio_drv = { | |||
324 | .driver.owner = THIS_MODULE, | 386 | .driver.owner = THIS_MODULE, |
325 | .id_table = id_table, | 387 | .id_table = id_table, |
326 | .probe = p9_virtio_probe, | 388 | .probe = p9_virtio_probe, |
389 | .remove = p9_virtio_remove, | ||
327 | }; | 390 | }; |
328 | 391 | ||
329 | static struct p9_trans_module p9_virtio_trans = { | 392 | static struct p9_trans_module p9_virtio_trans = { |
330 | .name = "virtio", | 393 | .name = "virtio", |
331 | .create = p9_virtio_create, | 394 | .create = p9_virtio_create, |
332 | .maxsize = PAGE_SIZE, | 395 | .maxsize = PAGE_SIZE*16, |
333 | .def = 0, | 396 | .def = 0, |
334 | }; | 397 | }; |
335 | 398 | ||
@@ -345,7 +408,13 @@ static int __init p9_virtio_init(void) | |||
345 | return register_virtio_driver(&p9_virtio_drv); | 408 | return register_virtio_driver(&p9_virtio_drv); |
346 | } | 409 | } |
347 | 410 | ||
411 | static void __exit p9_virtio_cleanup(void) | ||
412 | { | ||
413 | unregister_virtio_driver(&p9_virtio_drv); | ||
414 | } | ||
415 | |||
348 | module_init(p9_virtio_init); | 416 | module_init(p9_virtio_init); |
417 | module_exit(p9_virtio_cleanup); | ||
349 | 418 | ||
350 | MODULE_DEVICE_TABLE(virtio, id_table); | 419 | MODULE_DEVICE_TABLE(virtio, id_table); |
351 | MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); | 420 | MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); |
diff --git a/net/9p/util.c b/net/9p/util.c index 22077b79395d..ef7215565d88 100644 --- a/net/9p/util.c +++ b/net/9p/util.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include <net/9p/9p.h> | 33 | #include <net/9p/9p.h> |
34 | 34 | ||
35 | struct p9_idpool { | 35 | struct p9_idpool { |
36 | struct semaphore lock; | 36 | spinlock_t lock; |
37 | struct idr pool; | 37 | struct idr pool; |
38 | }; | 38 | }; |
39 | 39 | ||
@@ -45,7 +45,7 @@ struct p9_idpool *p9_idpool_create(void) | |||
45 | if (!p) | 45 | if (!p) |
46 | return ERR_PTR(-ENOMEM); | 46 | return ERR_PTR(-ENOMEM); |
47 | 47 | ||
48 | init_MUTEX(&p->lock); | 48 | spin_lock_init(&p->lock); |
49 | idr_init(&p->pool); | 49 | idr_init(&p->pool); |
50 | 50 | ||
51 | return p; | 51 | return p; |
@@ -71,19 +71,17 @@ int p9_idpool_get(struct p9_idpool *p) | |||
71 | { | 71 | { |
72 | int i = 0; | 72 | int i = 0; |
73 | int error; | 73 | int error; |
74 | unsigned int flags; | ||
74 | 75 | ||
75 | retry: | 76 | retry: |
76 | if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) | 77 | if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) |
77 | return 0; | 78 | return 0; |
78 | 79 | ||
79 | if (down_interruptible(&p->lock) == -EINTR) { | 80 | spin_lock_irqsave(&p->lock, flags); |
80 | P9_EPRINTK(KERN_WARNING, "Interrupted while locking\n"); | ||
81 | return -1; | ||
82 | } | ||
83 | 81 | ||
84 | /* no need to store exactly p, we just need something non-null */ | 82 | /* no need to store exactly p, we just need something non-null */ |
85 | error = idr_get_new(&p->pool, p, &i); | 83 | error = idr_get_new(&p->pool, p, &i); |
86 | up(&p->lock); | 84 | spin_unlock_irqrestore(&p->lock, flags); |
87 | 85 | ||
88 | if (error == -EAGAIN) | 86 | if (error == -EAGAIN) |
89 | goto retry; | 87 | goto retry; |
@@ -104,12 +102,10 @@ EXPORT_SYMBOL(p9_idpool_get); | |||
104 | 102 | ||
105 | void p9_idpool_put(int id, struct p9_idpool *p) | 103 | void p9_idpool_put(int id, struct p9_idpool *p) |
106 | { | 104 | { |
107 | if (down_interruptible(&p->lock) == -EINTR) { | 105 | unsigned int flags; |
108 | P9_EPRINTK(KERN_WARNING, "Interrupted while locking\n"); | 106 | spin_lock_irqsave(&p->lock, flags); |
109 | return; | ||
110 | } | ||
111 | idr_remove(&p->pool, id); | 107 | idr_remove(&p->pool, id); |
112 | up(&p->lock); | 108 | spin_unlock_irqrestore(&p->lock, flags); |
113 | } | 109 | } |
114 | EXPORT_SYMBOL(p9_idpool_put); | 110 | EXPORT_SYMBOL(p9_idpool_put); |
115 | 111 | ||
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 782a22602b86..519cdb920f93 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c | |||
@@ -135,8 +135,8 @@ static void __hidp_copy_session(struct hidp_session *session, struct hidp_connin | |||
135 | } | 135 | } |
136 | } | 136 | } |
137 | 137 | ||
138 | static inline int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, | 138 | static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, |
139 | unsigned int type, unsigned int code, int value) | 139 | unsigned int type, unsigned int code, int value) |
140 | { | 140 | { |
141 | unsigned char newleds; | 141 | unsigned char newleds; |
142 | struct sk_buff *skb; | 142 | struct sk_buff *skb; |
@@ -243,7 +243,8 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) | |||
243 | input_sync(dev); | 243 | input_sync(dev); |
244 | } | 244 | } |
245 | 245 | ||
246 | static inline int hidp_queue_report(struct hidp_session *session, unsigned char *data, int size) | 246 | static int hidp_queue_report(struct hidp_session *session, |
247 | unsigned char *data, int size) | ||
247 | { | 248 | { |
248 | struct sk_buff *skb; | 249 | struct sk_buff *skb; |
249 | 250 | ||
@@ -287,7 +288,7 @@ static void hidp_idle_timeout(unsigned long arg) | |||
287 | hidp_schedule(session); | 288 | hidp_schedule(session); |
288 | } | 289 | } |
289 | 290 | ||
290 | static inline void hidp_set_timer(struct hidp_session *session) | 291 | static void hidp_set_timer(struct hidp_session *session) |
291 | { | 292 | { |
292 | if (session->idle_to > 0) | 293 | if (session->idle_to > 0) |
293 | mod_timer(&session->timer, jiffies + HZ * session->idle_to); | 294 | mod_timer(&session->timer, jiffies + HZ * session->idle_to); |
@@ -332,7 +333,8 @@ static inline int hidp_send_ctrl_message(struct hidp_session *session, | |||
332 | return err; | 333 | return err; |
333 | } | 334 | } |
334 | 335 | ||
335 | static inline void hidp_process_handshake(struct hidp_session *session, unsigned char param) | 336 | static void hidp_process_handshake(struct hidp_session *session, |
337 | unsigned char param) | ||
336 | { | 338 | { |
337 | BT_DBG("session %p param 0x%02x", session, param); | 339 | BT_DBG("session %p param 0x%02x", session, param); |
338 | 340 | ||
@@ -365,38 +367,23 @@ static inline void hidp_process_handshake(struct hidp_session *session, unsigned | |||
365 | } | 367 | } |
366 | } | 368 | } |
367 | 369 | ||
368 | static inline void hidp_process_hid_control(struct hidp_session *session, unsigned char param) | 370 | static void hidp_process_hid_control(struct hidp_session *session, |
371 | unsigned char param) | ||
369 | { | 372 | { |
370 | BT_DBG("session %p param 0x%02x", session, param); | 373 | BT_DBG("session %p param 0x%02x", session, param); |
371 | 374 | ||
372 | switch (param) { | 375 | if (param == HIDP_CTRL_VIRTUAL_CABLE_UNPLUG) { |
373 | case HIDP_CTRL_NOP: | ||
374 | break; | ||
375 | |||
376 | case HIDP_CTRL_VIRTUAL_CABLE_UNPLUG: | ||
377 | /* Flush the transmit queues */ | 376 | /* Flush the transmit queues */ |
378 | skb_queue_purge(&session->ctrl_transmit); | 377 | skb_queue_purge(&session->ctrl_transmit); |
379 | skb_queue_purge(&session->intr_transmit); | 378 | skb_queue_purge(&session->intr_transmit); |
380 | 379 | ||
381 | /* Kill session thread */ | 380 | /* Kill session thread */ |
382 | atomic_inc(&session->terminate); | 381 | atomic_inc(&session->terminate); |
383 | break; | ||
384 | |||
385 | case HIDP_CTRL_HARD_RESET: | ||
386 | case HIDP_CTRL_SOFT_RESET: | ||
387 | case HIDP_CTRL_SUSPEND: | ||
388 | case HIDP_CTRL_EXIT_SUSPEND: | ||
389 | /* FIXME: We have to parse these and return no error */ | ||
390 | break; | ||
391 | |||
392 | default: | ||
393 | __hidp_send_ctrl_message(session, | ||
394 | HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0); | ||
395 | break; | ||
396 | } | 382 | } |
397 | } | 383 | } |
398 | 384 | ||
399 | static inline void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, unsigned char param) | 385 | static void hidp_process_data(struct hidp_session *session, struct sk_buff *skb, |
386 | unsigned char param) | ||
400 | { | 387 | { |
401 | BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param); | 388 | BT_DBG("session %p skb %p len %d param 0x%02x", session, skb, skb->len, param); |
402 | 389 | ||
@@ -423,7 +410,8 @@ static inline void hidp_process_data(struct hidp_session *session, struct sk_buf | |||
423 | } | 410 | } |
424 | } | 411 | } |
425 | 412 | ||
426 | static inline void hidp_recv_ctrl_frame(struct hidp_session *session, struct sk_buff *skb) | 413 | static void hidp_recv_ctrl_frame(struct hidp_session *session, |
414 | struct sk_buff *skb) | ||
427 | { | 415 | { |
428 | unsigned char hdr, type, param; | 416 | unsigned char hdr, type, param; |
429 | 417 | ||
@@ -457,7 +445,8 @@ static inline void hidp_recv_ctrl_frame(struct hidp_session *session, struct sk_ | |||
457 | kfree_skb(skb); | 445 | kfree_skb(skb); |
458 | } | 446 | } |
459 | 447 | ||
460 | static inline void hidp_recv_intr_frame(struct hidp_session *session, struct sk_buff *skb) | 448 | static void hidp_recv_intr_frame(struct hidp_session *session, |
449 | struct sk_buff *skb) | ||
461 | { | 450 | { |
462 | unsigned char hdr; | 451 | unsigned char hdr; |
463 | 452 | ||
@@ -625,7 +614,8 @@ static struct device *hidp_get_device(struct hidp_session *session) | |||
625 | return conn ? &conn->dev : NULL; | 614 | return conn ? &conn->dev : NULL; |
626 | } | 615 | } |
627 | 616 | ||
628 | static inline int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) | 617 | static int hidp_setup_input(struct hidp_session *session, |
618 | struct hidp_connadd_req *req) | ||
629 | { | 619 | { |
630 | struct input_dev *input = session->input; | 620 | struct input_dev *input = session->input; |
631 | int i; | 621 | int i; |
@@ -702,7 +692,8 @@ static void hidp_setup_quirks(struct hid_device *hid) | |||
702 | hid->quirks = hidp_blacklist[n].quirks; | 692 | hid->quirks = hidp_blacklist[n].quirks; |
703 | } | 693 | } |
704 | 694 | ||
705 | static inline void hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req) | 695 | static void hidp_setup_hid(struct hidp_session *session, |
696 | struct hidp_connadd_req *req) | ||
706 | { | 697 | { |
707 | struct hid_device *hid = session->hid; | 698 | struct hid_device *hid = session->hid; |
708 | struct hid_report *report; | 699 | struct hid_report *report; |
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 788c70321858..e4c779bb8d76 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c | |||
@@ -429,7 +429,8 @@ static int rfcomm_release_dev(void __user *arg) | |||
429 | if (dev->tty) | 429 | if (dev->tty) |
430 | tty_vhangup(dev->tty); | 430 | tty_vhangup(dev->tty); |
431 | 431 | ||
432 | rfcomm_dev_del(dev); | 432 | if (!test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) |
433 | rfcomm_dev_del(dev); | ||
433 | rfcomm_dev_put(dev); | 434 | rfcomm_dev_put(dev); |
434 | return 0; | 435 | return 0; |
435 | } | 436 | } |
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 26e941d912e8..7b660834a4c2 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -287,7 +287,7 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys); | |||
287 | * @ops: pernet operations structure to manipulate | 287 | * @ops: pernet operations structure to manipulate |
288 | * | 288 | * |
289 | * Remove the pernet operations structure from the list to be | 289 | * Remove the pernet operations structure from the list to be |
290 | * used when network namespaces are created or destoryed. In | 290 | * used when network namespaces are created or destroyed. In |
291 | * addition run the exit method for all existing network | 291 | * addition run the exit method for all existing network |
292 | * namespaces. | 292 | * namespaces. |
293 | */ | 293 | */ |
@@ -335,7 +335,7 @@ EXPORT_SYMBOL_GPL(register_pernet_device); | |||
335 | * @ops: pernet operations structure to manipulate | 335 | * @ops: pernet operations structure to manipulate |
336 | * | 336 | * |
337 | * Remove the pernet operations structure from the list to be | 337 | * Remove the pernet operations structure from the list to be |
338 | * used when network namespaces are created or destoryed. In | 338 | * used when network namespaces are created or destroyed. In |
339 | * addition run the exit method for all existing network | 339 | * addition run the exit method for all existing network |
340 | * namespaces. | 340 | * namespaces. |
341 | */ | 341 | */ |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ddbdde82a700..61ac8d06292c 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -82,32 +82,6 @@ int rtnl_trylock(void) | |||
82 | return mutex_trylock(&rtnl_mutex); | 82 | return mutex_trylock(&rtnl_mutex); |
83 | } | 83 | } |
84 | 84 | ||
85 | int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) | ||
86 | { | ||
87 | memset(tb, 0, sizeof(struct rtattr*)*maxattr); | ||
88 | |||
89 | while (RTA_OK(rta, len)) { | ||
90 | unsigned flavor = rta->rta_type; | ||
91 | if (flavor && flavor <= maxattr) | ||
92 | tb[flavor-1] = rta; | ||
93 | rta = RTA_NEXT(rta, len); | ||
94 | } | ||
95 | return 0; | ||
96 | } | ||
97 | |||
98 | int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, | ||
99 | struct rtattr *rta, int len) | ||
100 | { | ||
101 | if (RTA_PAYLOAD(rta) < len) | ||
102 | return -1; | ||
103 | if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { | ||
104 | rta = RTA_DATA(rta) + RTA_ALIGN(len); | ||
105 | return rtattr_parse_nested(tb, maxattr, rta); | ||
106 | } | ||
107 | memset(tb, 0, sizeof(struct rtattr *) * maxattr); | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static struct rtnl_link *rtnl_msg_handlers[NPROTO]; | 85 | static struct rtnl_link *rtnl_msg_handlers[NPROTO]; |
112 | 86 | ||
113 | static inline int rtm_msgindex(int msgtype) | 87 | static inline int rtm_msgindex(int msgtype) |
@@ -442,21 +416,6 @@ void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data | |||
442 | memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); | 416 | memset(RTA_DATA(rta) + attrlen, 0, RTA_ALIGN(size) - size); |
443 | } | 417 | } |
444 | 418 | ||
445 | size_t rtattr_strlcpy(char *dest, const struct rtattr *rta, size_t size) | ||
446 | { | ||
447 | size_t ret = RTA_PAYLOAD(rta); | ||
448 | char *src = RTA_DATA(rta); | ||
449 | |||
450 | if (ret > 0 && src[ret - 1] == '\0') | ||
451 | ret--; | ||
452 | if (size > 0) { | ||
453 | size_t len = (ret >= size) ? size - 1 : ret; | ||
454 | memset(dest, 0, size); | ||
455 | memcpy(dest, src, len); | ||
456 | } | ||
457 | return ret; | ||
458 | } | ||
459 | |||
460 | int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) | 419 | int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) |
461 | { | 420 | { |
462 | struct sock *rtnl = net->rtnl; | 421 | struct sock *rtnl = net->rtnl; |
@@ -1411,9 +1370,6 @@ void __init rtnetlink_init(void) | |||
1411 | } | 1370 | } |
1412 | 1371 | ||
1413 | EXPORT_SYMBOL(__rta_fill); | 1372 | EXPORT_SYMBOL(__rta_fill); |
1414 | EXPORT_SYMBOL(rtattr_strlcpy); | ||
1415 | EXPORT_SYMBOL(rtattr_parse); | ||
1416 | EXPORT_SYMBOL(__rtattr_parse_nested_compat); | ||
1417 | EXPORT_SYMBOL(rtnetlink_put_metrics); | 1373 | EXPORT_SYMBOL(rtnetlink_put_metrics); |
1418 | EXPORT_SYMBOL(rtnl_lock); | 1374 | EXPORT_SYMBOL(rtnl_lock); |
1419 | EXPORT_SYMBOL(rtnl_trylock); | 1375 | EXPORT_SYMBOL(rtnl_trylock); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 98420f9c4b6d..4e354221ec23 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -2461,6 +2461,34 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) | |||
2461 | return elt; | 2461 | return elt; |
2462 | } | 2462 | } |
2463 | 2463 | ||
2464 | /** | ||
2465 | * skb_partial_csum_set - set up and verify partial csum values for packet | ||
2466 | * @skb: the skb to set | ||
2467 | * @start: the number of bytes after skb->data to start checksumming. | ||
2468 | * @off: the offset from start to place the checksum. | ||
2469 | * | ||
2470 | * For untrusted partially-checksummed packets, we need to make sure the values | ||
2471 | * for skb->csum_start and skb->csum_offset are valid so we don't oops. | ||
2472 | * | ||
2473 | * This function checks and sets those values and skb->ip_summed: if this | ||
2474 | * returns false you should drop the packet. | ||
2475 | */ | ||
2476 | bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) | ||
2477 | { | ||
2478 | if (unlikely(start > skb->len - 2) || | ||
2479 | unlikely((int)start + off > skb->len - 2)) { | ||
2480 | if (net_ratelimit()) | ||
2481 | printk(KERN_WARNING | ||
2482 | "bad partial csum: csum=%u/%u len=%u\n", | ||
2483 | start, off, skb->len); | ||
2484 | return false; | ||
2485 | } | ||
2486 | skb->ip_summed = CHECKSUM_PARTIAL; | ||
2487 | skb->csum_start = skb_headroom(skb) + start; | ||
2488 | skb->csum_offset = off; | ||
2489 | return true; | ||
2490 | } | ||
2491 | |||
2464 | EXPORT_SYMBOL(___pskb_trim); | 2492 | EXPORT_SYMBOL(___pskb_trim); |
2465 | EXPORT_SYMBOL(__kfree_skb); | 2493 | EXPORT_SYMBOL(__kfree_skb); |
2466 | EXPORT_SYMBOL(kfree_skb); | 2494 | EXPORT_SYMBOL(kfree_skb); |
@@ -2497,3 +2525,4 @@ EXPORT_SYMBOL(skb_append_datato_frags); | |||
2497 | 2525 | ||
2498 | EXPORT_SYMBOL_GPL(skb_to_sgvec); | 2526 | EXPORT_SYMBOL_GPL(skb_to_sgvec); |
2499 | EXPORT_SYMBOL_GPL(skb_cow_data); | 2527 | EXPORT_SYMBOL_GPL(skb_cow_data); |
2528 | EXPORT_SYMBOL_GPL(skb_partial_csum_set); | ||
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index ebe59d98721a..287a62bc2e0f 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -271,8 +271,6 @@ extern struct sk_buff *dccp_make_response(struct sock *sk, | |||
271 | 271 | ||
272 | extern int dccp_connect(struct sock *sk); | 272 | extern int dccp_connect(struct sock *sk); |
273 | extern int dccp_disconnect(struct sock *sk, int flags); | 273 | extern int dccp_disconnect(struct sock *sk, int flags); |
274 | extern void dccp_hash(struct sock *sk); | ||
275 | extern void dccp_unhash(struct sock *sk); | ||
276 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, | 274 | extern int dccp_getsockopt(struct sock *sk, int level, int optname, |
277 | char __user *optval, int __user *optlen); | 275 | char __user *optval, int __user *optlen); |
278 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, | 276 | extern int dccp_setsockopt(struct sock *sk, int level, int optname, |
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index c982ad88223d..474075adbde4 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c | |||
@@ -38,12 +38,6 @@ | |||
38 | */ | 38 | */ |
39 | static struct socket *dccp_v4_ctl_socket; | 39 | static struct socket *dccp_v4_ctl_socket; |
40 | 40 | ||
41 | static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) | ||
42 | { | ||
43 | return inet_csk_get_port(&dccp_hashinfo, sk, snum, | ||
44 | inet_csk_bind_conflict); | ||
45 | } | ||
46 | |||
47 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | 41 | int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) |
48 | { | 42 | { |
49 | struct inet_sock *inet = inet_sk(sk); | 43 | struct inet_sock *inet = inet_sk(sk); |
@@ -408,8 +402,8 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
408 | 402 | ||
409 | dccp_sync_mss(newsk, dst_mtu(dst)); | 403 | dccp_sync_mss(newsk, dst_mtu(dst)); |
410 | 404 | ||
411 | __inet_hash_nolisten(&dccp_hashinfo, newsk); | 405 | __inet_hash_nolisten(newsk); |
412 | __inet_inherit_port(&dccp_hashinfo, sk, newsk); | 406 | __inet_inherit_port(sk, newsk); |
413 | 407 | ||
414 | return newsk; | 408 | return newsk; |
415 | 409 | ||
@@ -898,6 +892,7 @@ static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { | |||
898 | .getsockopt = ip_getsockopt, | 892 | .getsockopt = ip_getsockopt, |
899 | .addr2sockaddr = inet_csk_addr2sockaddr, | 893 | .addr2sockaddr = inet_csk_addr2sockaddr, |
900 | .sockaddr_len = sizeof(struct sockaddr_in), | 894 | .sockaddr_len = sizeof(struct sockaddr_in), |
895 | .bind_conflict = inet_csk_bind_conflict, | ||
901 | #ifdef CONFIG_COMPAT | 896 | #ifdef CONFIG_COMPAT |
902 | .compat_setsockopt = compat_ip_setsockopt, | 897 | .compat_setsockopt = compat_ip_setsockopt, |
903 | .compat_getsockopt = compat_ip_getsockopt, | 898 | .compat_getsockopt = compat_ip_getsockopt, |
@@ -937,10 +932,10 @@ static struct proto dccp_v4_prot = { | |||
937 | .sendmsg = dccp_sendmsg, | 932 | .sendmsg = dccp_sendmsg, |
938 | .recvmsg = dccp_recvmsg, | 933 | .recvmsg = dccp_recvmsg, |
939 | .backlog_rcv = dccp_v4_do_rcv, | 934 | .backlog_rcv = dccp_v4_do_rcv, |
940 | .hash = dccp_hash, | 935 | .hash = inet_hash, |
941 | .unhash = dccp_unhash, | 936 | .unhash = inet_unhash, |
942 | .accept = inet_csk_accept, | 937 | .accept = inet_csk_accept, |
943 | .get_port = dccp_v4_get_port, | 938 | .get_port = inet_csk_get_port, |
944 | .shutdown = dccp_shutdown, | 939 | .shutdown = dccp_shutdown, |
945 | .destroy = dccp_destroy_sock, | 940 | .destroy = dccp_destroy_sock, |
946 | .orphan_count = &dccp_orphan_count, | 941 | .orphan_count = &dccp_orphan_count, |
@@ -948,6 +943,7 @@ static struct proto dccp_v4_prot = { | |||
948 | .obj_size = sizeof(struct dccp_sock), | 943 | .obj_size = sizeof(struct dccp_sock), |
949 | .rsk_prot = &dccp_request_sock_ops, | 944 | .rsk_prot = &dccp_request_sock_ops, |
950 | .twsk_prot = &dccp_timewait_sock_ops, | 945 | .twsk_prot = &dccp_timewait_sock_ops, |
946 | .hashinfo = &dccp_hashinfo, | ||
951 | #ifdef CONFIG_COMPAT | 947 | #ifdef CONFIG_COMPAT |
952 | .compat_setsockopt = compat_dccp_setsockopt, | 948 | .compat_setsockopt = compat_dccp_setsockopt, |
953 | .compat_getsockopt = compat_dccp_getsockopt, | 949 | .compat_getsockopt = compat_dccp_getsockopt, |
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index ed0a0053a797..490333d47c7b 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c | |||
@@ -39,21 +39,15 @@ static struct socket *dccp_v6_ctl_socket; | |||
39 | static struct inet_connection_sock_af_ops dccp_ipv6_mapped; | 39 | static struct inet_connection_sock_af_ops dccp_ipv6_mapped; |
40 | static struct inet_connection_sock_af_ops dccp_ipv6_af_ops; | 40 | static struct inet_connection_sock_af_ops dccp_ipv6_af_ops; |
41 | 41 | ||
42 | static int dccp_v6_get_port(struct sock *sk, unsigned short snum) | ||
43 | { | ||
44 | return inet_csk_get_port(&dccp_hashinfo, sk, snum, | ||
45 | inet6_csk_bind_conflict); | ||
46 | } | ||
47 | |||
48 | static void dccp_v6_hash(struct sock *sk) | 42 | static void dccp_v6_hash(struct sock *sk) |
49 | { | 43 | { |
50 | if (sk->sk_state != DCCP_CLOSED) { | 44 | if (sk->sk_state != DCCP_CLOSED) { |
51 | if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) { | 45 | if (inet_csk(sk)->icsk_af_ops == &dccp_ipv6_mapped) { |
52 | dccp_hash(sk); | 46 | inet_hash(sk); |
53 | return; | 47 | return; |
54 | } | 48 | } |
55 | local_bh_disable(); | 49 | local_bh_disable(); |
56 | __inet6_hash(&dccp_hashinfo, sk); | 50 | __inet6_hash(sk); |
57 | local_bh_enable(); | 51 | local_bh_enable(); |
58 | } | 52 | } |
59 | } | 53 | } |
@@ -630,8 +624,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, | |||
630 | 624 | ||
631 | newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; | 625 | newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6; |
632 | 626 | ||
633 | __inet6_hash(&dccp_hashinfo, newsk); | 627 | __inet6_hash(newsk); |
634 | inet_inherit_port(&dccp_hashinfo, sk, newsk); | 628 | inet_inherit_port(sk, newsk); |
635 | 629 | ||
636 | return newsk; | 630 | return newsk; |
637 | 631 | ||
@@ -1054,6 +1048,7 @@ static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { | |||
1054 | .getsockopt = ipv6_getsockopt, | 1048 | .getsockopt = ipv6_getsockopt, |
1055 | .addr2sockaddr = inet6_csk_addr2sockaddr, | 1049 | .addr2sockaddr = inet6_csk_addr2sockaddr, |
1056 | .sockaddr_len = sizeof(struct sockaddr_in6), | 1050 | .sockaddr_len = sizeof(struct sockaddr_in6), |
1051 | .bind_conflict = inet6_csk_bind_conflict, | ||
1057 | #ifdef CONFIG_COMPAT | 1052 | #ifdef CONFIG_COMPAT |
1058 | .compat_setsockopt = compat_ipv6_setsockopt, | 1053 | .compat_setsockopt = compat_ipv6_setsockopt, |
1059 | .compat_getsockopt = compat_ipv6_getsockopt, | 1054 | .compat_getsockopt = compat_ipv6_getsockopt, |
@@ -1123,9 +1118,9 @@ static struct proto dccp_v6_prot = { | |||
1123 | .recvmsg = dccp_recvmsg, | 1118 | .recvmsg = dccp_recvmsg, |
1124 | .backlog_rcv = dccp_v6_do_rcv, | 1119 | .backlog_rcv = dccp_v6_do_rcv, |
1125 | .hash = dccp_v6_hash, | 1120 | .hash = dccp_v6_hash, |
1126 | .unhash = dccp_unhash, | 1121 | .unhash = inet_unhash, |
1127 | .accept = inet_csk_accept, | 1122 | .accept = inet_csk_accept, |
1128 | .get_port = dccp_v6_get_port, | 1123 | .get_port = inet_csk_get_port, |
1129 | .shutdown = dccp_shutdown, | 1124 | .shutdown = dccp_shutdown, |
1130 | .destroy = dccp_v6_destroy_sock, | 1125 | .destroy = dccp_v6_destroy_sock, |
1131 | .orphan_count = &dccp_orphan_count, | 1126 | .orphan_count = &dccp_orphan_count, |
@@ -1133,6 +1128,7 @@ static struct proto dccp_v6_prot = { | |||
1133 | .obj_size = sizeof(struct dccp6_sock), | 1128 | .obj_size = sizeof(struct dccp6_sock), |
1134 | .rsk_prot = &dccp6_request_sock_ops, | 1129 | .rsk_prot = &dccp6_request_sock_ops, |
1135 | .twsk_prot = &dccp6_timewait_sock_ops, | 1130 | .twsk_prot = &dccp6_timewait_sock_ops, |
1131 | .hashinfo = &dccp_hashinfo, | ||
1136 | #ifdef CONFIG_COMPAT | 1132 | #ifdef CONFIG_COMPAT |
1137 | .compat_setsockopt = compat_dccp_setsockopt, | 1133 | .compat_setsockopt = compat_dccp_setsockopt, |
1138 | .compat_getsockopt = compat_dccp_getsockopt, | 1134 | .compat_getsockopt = compat_dccp_getsockopt, |
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 0bed4a6095b7..e3f5d37b84be 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c | |||
@@ -78,7 +78,7 @@ void dccp_set_state(struct sock *sk, const int state) | |||
78 | sk->sk_prot->unhash(sk); | 78 | sk->sk_prot->unhash(sk); |
79 | if (inet_csk(sk)->icsk_bind_hash != NULL && | 79 | if (inet_csk(sk)->icsk_bind_hash != NULL && |
80 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | 80 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) |
81 | inet_put_port(&dccp_hashinfo, sk); | 81 | inet_put_port(sk); |
82 | /* fall through */ | 82 | /* fall through */ |
83 | default: | 83 | default: |
84 | if (oldstate == DCCP_OPEN) | 84 | if (oldstate == DCCP_OPEN) |
@@ -173,20 +173,6 @@ const char *dccp_state_name(const int state) | |||
173 | 173 | ||
174 | EXPORT_SYMBOL_GPL(dccp_state_name); | 174 | EXPORT_SYMBOL_GPL(dccp_state_name); |
175 | 175 | ||
176 | void dccp_hash(struct sock *sk) | ||
177 | { | ||
178 | inet_hash(&dccp_hashinfo, sk); | ||
179 | } | ||
180 | |||
181 | EXPORT_SYMBOL_GPL(dccp_hash); | ||
182 | |||
183 | void dccp_unhash(struct sock *sk) | ||
184 | { | ||
185 | inet_unhash(&dccp_hashinfo, sk); | ||
186 | } | ||
187 | |||
188 | EXPORT_SYMBOL_GPL(dccp_unhash); | ||
189 | |||
190 | int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) | 176 | int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) |
191 | { | 177 | { |
192 | struct dccp_sock *dp = dccp_sk(sk); | 178 | struct dccp_sock *dp = dccp_sk(sk); |
@@ -268,7 +254,7 @@ int dccp_destroy_sock(struct sock *sk) | |||
268 | 254 | ||
269 | /* Clean up a referenced DCCP bind bucket. */ | 255 | /* Clean up a referenced DCCP bind bucket. */ |
270 | if (inet_csk(sk)->icsk_bind_hash != NULL) | 256 | if (inet_csk(sk)->icsk_bind_hash != NULL) |
271 | inet_put_port(&dccp_hashinfo, sk); | 257 | inet_put_port(sk); |
272 | 258 | ||
273 | kfree(dp->dccps_service_list); | 259 | kfree(dp->dccps_service_list); |
274 | dp->dccps_service_list = NULL; | 260 | dp->dccps_service_list = NULL; |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index a2241060113b..8cd357f41283 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -547,8 +547,8 @@ int cipso_v4_doi_remove(u32 doi, | |||
547 | rcu_read_lock(); | 547 | rcu_read_lock(); |
548 | list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) | 548 | list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) |
549 | if (dom_iter->valid) | 549 | if (dom_iter->valid) |
550 | netlbl_domhsh_remove(dom_iter->domain, | 550 | netlbl_cfg_map_del(dom_iter->domain, |
551 | audit_info); | 551 | audit_info); |
552 | rcu_read_unlock(); | 552 | rcu_read_unlock(); |
553 | cipso_v4_cache_invalidate(); | 553 | cipso_v4_cache_invalidate(); |
554 | call_rcu(&doi_def->rcu, callback); | 554 | call_rcu(&doi_def->rcu, callback); |
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 35851c96bdfb..f5fba3f71c06 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c | |||
@@ -2431,8 +2431,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) | |||
2431 | rtn_type(buf2, sizeof(buf2), | 2431 | rtn_type(buf2, sizeof(buf2), |
2432 | fa->fa_type)); | 2432 | fa->fa_type)); |
2433 | if (fa->fa_tos) | 2433 | if (fa->fa_tos) |
2434 | seq_printf(seq, "tos =%d\n", | 2434 | seq_printf(seq, " tos=%d", fa->fa_tos); |
2435 | fa->fa_tos); | ||
2436 | seq_putc(seq, '\n'); | 2435 | seq_putc(seq, '\n'); |
2437 | } | 2436 | } |
2438 | } | 2437 | } |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a7321a82df6d..a13c074dac09 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -1015,7 +1015,8 @@ int icmp_rcv(struct sk_buff *skb) | |||
1015 | goto error; | 1015 | goto error; |
1016 | } | 1016 | } |
1017 | 1017 | ||
1018 | __skb_pull(skb, sizeof(*icmph)); | 1018 | if (!pskb_pull(skb, sizeof(*icmph))) |
1019 | goto error; | ||
1019 | 1020 | ||
1020 | icmph = icmp_hdr(skb); | 1021 | icmph = icmp_hdr(skb); |
1021 | 1022 | ||
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index de5a41de191a..b189278c7bc1 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -78,11 +78,9 @@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); | |||
78 | /* Obtain a reference to a local port for the given sock, | 78 | /* Obtain a reference to a local port for the given sock, |
79 | * if snum is zero it means select any available local port. | 79 | * if snum is zero it means select any available local port. |
80 | */ | 80 | */ |
81 | int inet_csk_get_port(struct inet_hashinfo *hashinfo, | 81 | int inet_csk_get_port(struct sock *sk, unsigned short snum) |
82 | struct sock *sk, unsigned short snum, | ||
83 | int (*bind_conflict)(const struct sock *sk, | ||
84 | const struct inet_bind_bucket *tb)) | ||
85 | { | 82 | { |
83 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | ||
86 | struct inet_bind_hashbucket *head; | 84 | struct inet_bind_hashbucket *head; |
87 | struct hlist_node *node; | 85 | struct hlist_node *node; |
88 | struct inet_bind_bucket *tb; | 86 | struct inet_bind_bucket *tb; |
@@ -142,7 +140,7 @@ tb_found: | |||
142 | goto success; | 140 | goto success; |
143 | } else { | 141 | } else { |
144 | ret = 1; | 142 | ret = 1; |
145 | if (bind_conflict(sk, tb)) | 143 | if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) |
146 | goto fail_unlock; | 144 | goto fail_unlock; |
147 | } | 145 | } |
148 | } | 146 | } |
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 48d45008f749..9cac6c034abd 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -66,8 +66,9 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, | |||
66 | /* | 66 | /* |
67 | * Get rid of any references to a local port held by the given sock. | 67 | * Get rid of any references to a local port held by the given sock. |
68 | */ | 68 | */ |
69 | static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) | 69 | static void __inet_put_port(struct sock *sk) |
70 | { | 70 | { |
71 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | ||
71 | const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); | 72 | const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); |
72 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; | 73 | struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; |
73 | struct inet_bind_bucket *tb; | 74 | struct inet_bind_bucket *tb; |
@@ -81,10 +82,10 @@ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) | |||
81 | spin_unlock(&head->lock); | 82 | spin_unlock(&head->lock); |
82 | } | 83 | } |
83 | 84 | ||
84 | void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) | 85 | void inet_put_port(struct sock *sk) |
85 | { | 86 | { |
86 | local_bh_disable(); | 87 | local_bh_disable(); |
87 | __inet_put_port(hashinfo, sk); | 88 | __inet_put_port(sk); |
88 | local_bh_enable(); | 89 | local_bh_enable(); |
89 | } | 90 | } |
90 | 91 | ||
@@ -317,8 +318,9 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) | |||
317 | inet->dport); | 318 | inet->dport); |
318 | } | 319 | } |
319 | 320 | ||
320 | void __inet_hash_nolisten(struct inet_hashinfo *hashinfo, struct sock *sk) | 321 | void __inet_hash_nolisten(struct sock *sk) |
321 | { | 322 | { |
323 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | ||
322 | struct hlist_head *list; | 324 | struct hlist_head *list; |
323 | rwlock_t *lock; | 325 | rwlock_t *lock; |
324 | struct inet_ehash_bucket *head; | 326 | struct inet_ehash_bucket *head; |
@@ -337,13 +339,14 @@ void __inet_hash_nolisten(struct inet_hashinfo *hashinfo, struct sock *sk) | |||
337 | } | 339 | } |
338 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); | 340 | EXPORT_SYMBOL_GPL(__inet_hash_nolisten); |
339 | 341 | ||
340 | void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) | 342 | static void __inet_hash(struct sock *sk) |
341 | { | 343 | { |
344 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | ||
342 | struct hlist_head *list; | 345 | struct hlist_head *list; |
343 | rwlock_t *lock; | 346 | rwlock_t *lock; |
344 | 347 | ||
345 | if (sk->sk_state != TCP_LISTEN) { | 348 | if (sk->sk_state != TCP_LISTEN) { |
346 | __inet_hash_nolisten(hashinfo, sk); | 349 | __inet_hash_nolisten(sk); |
347 | return; | 350 | return; |
348 | } | 351 | } |
349 | 352 | ||
@@ -357,13 +360,48 @@ void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) | |||
357 | write_unlock(lock); | 360 | write_unlock(lock); |
358 | wake_up(&hashinfo->lhash_wait); | 361 | wake_up(&hashinfo->lhash_wait); |
359 | } | 362 | } |
360 | EXPORT_SYMBOL_GPL(__inet_hash); | 363 | |
364 | void inet_hash(struct sock *sk) | ||
365 | { | ||
366 | if (sk->sk_state != TCP_CLOSE) { | ||
367 | local_bh_disable(); | ||
368 | __inet_hash(sk); | ||
369 | local_bh_enable(); | ||
370 | } | ||
371 | } | ||
372 | EXPORT_SYMBOL_GPL(inet_hash); | ||
373 | |||
374 | void inet_unhash(struct sock *sk) | ||
375 | { | ||
376 | rwlock_t *lock; | ||
377 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | ||
378 | |||
379 | if (sk_unhashed(sk)) | ||
380 | goto out; | ||
381 | |||
382 | if (sk->sk_state == TCP_LISTEN) { | ||
383 | local_bh_disable(); | ||
384 | inet_listen_wlock(hashinfo); | ||
385 | lock = &hashinfo->lhash_lock; | ||
386 | } else { | ||
387 | lock = inet_ehash_lockp(hashinfo, sk->sk_hash); | ||
388 | write_lock_bh(lock); | ||
389 | } | ||
390 | |||
391 | if (__sk_del_node_init(sk)) | ||
392 | sock_prot_inuse_add(sk->sk_prot, -1); | ||
393 | write_unlock_bh(lock); | ||
394 | out: | ||
395 | if (sk->sk_state == TCP_LISTEN) | ||
396 | wake_up(&hashinfo->lhash_wait); | ||
397 | } | ||
398 | EXPORT_SYMBOL_GPL(inet_unhash); | ||
361 | 399 | ||
362 | int __inet_hash_connect(struct inet_timewait_death_row *death_row, | 400 | int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
363 | struct sock *sk, | 401 | struct sock *sk, u32 port_offset, |
364 | int (*check_established)(struct inet_timewait_death_row *, | 402 | int (*check_established)(struct inet_timewait_death_row *, |
365 | struct sock *, __u16, struct inet_timewait_sock **), | 403 | struct sock *, __u16, struct inet_timewait_sock **), |
366 | void (*hash)(struct inet_hashinfo *, struct sock *)) | 404 | void (*hash)(struct sock *sk)) |
367 | { | 405 | { |
368 | struct inet_hashinfo *hinfo = death_row->hashinfo; | 406 | struct inet_hashinfo *hinfo = death_row->hashinfo; |
369 | const unsigned short snum = inet_sk(sk)->num; | 407 | const unsigned short snum = inet_sk(sk)->num; |
@@ -375,7 +413,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
375 | if (!snum) { | 413 | if (!snum) { |
376 | int i, remaining, low, high, port; | 414 | int i, remaining, low, high, port; |
377 | static u32 hint; | 415 | static u32 hint; |
378 | u32 offset = hint + inet_sk_port_offset(sk); | 416 | u32 offset = hint + port_offset; |
379 | struct hlist_node *node; | 417 | struct hlist_node *node; |
380 | struct inet_timewait_sock *tw = NULL; | 418 | struct inet_timewait_sock *tw = NULL; |
381 | 419 | ||
@@ -427,7 +465,7 @@ ok: | |||
427 | inet_bind_hash(sk, tb, port); | 465 | inet_bind_hash(sk, tb, port); |
428 | if (sk_unhashed(sk)) { | 466 | if (sk_unhashed(sk)) { |
429 | inet_sk(sk)->sport = htons(port); | 467 | inet_sk(sk)->sport = htons(port); |
430 | hash(hinfo, sk); | 468 | hash(sk); |
431 | } | 469 | } |
432 | spin_unlock(&head->lock); | 470 | spin_unlock(&head->lock); |
433 | 471 | ||
@@ -444,7 +482,7 @@ ok: | |||
444 | tb = inet_csk(sk)->icsk_bind_hash; | 482 | tb = inet_csk(sk)->icsk_bind_hash; |
445 | spin_lock_bh(&head->lock); | 483 | spin_lock_bh(&head->lock); |
446 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 484 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
447 | hash(hinfo, sk); | 485 | hash(sk); |
448 | spin_unlock_bh(&head->lock); | 486 | spin_unlock_bh(&head->lock); |
449 | return 0; | 487 | return 0; |
450 | } else { | 488 | } else { |
@@ -464,7 +502,7 @@ EXPORT_SYMBOL_GPL(__inet_hash_connect); | |||
464 | int inet_hash_connect(struct inet_timewait_death_row *death_row, | 502 | int inet_hash_connect(struct inet_timewait_death_row *death_row, |
465 | struct sock *sk) | 503 | struct sock *sk) |
466 | { | 504 | { |
467 | return __inet_hash_connect(death_row, sk, | 505 | return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk), |
468 | __inet_check_established, __inet_hash_nolisten); | 506 | __inet_check_established, __inet_hash_nolisten); |
469 | } | 507 | } |
470 | 508 | ||
diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/ipv4/ipvs/ip_vs_wrr.c index 749fa044eca5..85c680add6df 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/ipv4/ipvs/ip_vs_wrr.c | |||
@@ -22,6 +22,7 @@ | |||
22 | 22 | ||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/net.h> | ||
25 | 26 | ||
26 | #include <net/ip_vs.h> | 27 | #include <net/ip_vs.h> |
27 | 28 | ||
@@ -169,7 +170,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | |||
169 | */ | 170 | */ |
170 | if (mark->cw == 0) { | 171 | if (mark->cw == 0) { |
171 | mark->cl = &svc->destinations; | 172 | mark->cl = &svc->destinations; |
172 | IP_VS_INFO("ip_vs_wrr_schedule(): " | 173 | IP_VS_ERR_RL("ip_vs_wrr_schedule(): " |
173 | "no available servers\n"); | 174 | "no available servers\n"); |
174 | dest = NULL; | 175 | dest = NULL; |
175 | goto out; | 176 | goto out; |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a0d373bd9065..071e83a894ad 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -1669,7 +1669,7 @@ void tcp_set_state(struct sock *sk, int state) | |||
1669 | sk->sk_prot->unhash(sk); | 1669 | sk->sk_prot->unhash(sk); |
1670 | if (inet_csk(sk)->icsk_bind_hash && | 1670 | if (inet_csk(sk)->icsk_bind_hash && |
1671 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) | 1671 | !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) |
1672 | inet_put_port(&tcp_hashinfo, sk); | 1672 | inet_put_port(sk); |
1673 | /* fall through */ | 1673 | /* fall through */ |
1674 | default: | 1674 | default: |
1675 | if (oldstate==TCP_ESTABLISHED) | 1675 | if (oldstate==TCP_ESTABLISHED) |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 77c1939a2b0d..63414ea427c5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -108,22 +108,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { | |||
108 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), | 108 | .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), |
109 | }; | 109 | }; |
110 | 110 | ||
111 | static int tcp_v4_get_port(struct sock *sk, unsigned short snum) | ||
112 | { | ||
113 | return inet_csk_get_port(&tcp_hashinfo, sk, snum, | ||
114 | inet_csk_bind_conflict); | ||
115 | } | ||
116 | |||
117 | static void tcp_v4_hash(struct sock *sk) | ||
118 | { | ||
119 | inet_hash(&tcp_hashinfo, sk); | ||
120 | } | ||
121 | |||
122 | void tcp_unhash(struct sock *sk) | ||
123 | { | ||
124 | inet_unhash(&tcp_hashinfo, sk); | ||
125 | } | ||
126 | |||
127 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) | 111 | static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) |
128 | { | 112 | { |
129 | return secure_tcp_sequence_number(ip_hdr(skb)->daddr, | 113 | return secure_tcp_sequence_number(ip_hdr(skb)->daddr, |
@@ -1478,8 +1462,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1478 | } | 1462 | } |
1479 | #endif | 1463 | #endif |
1480 | 1464 | ||
1481 | __inet_hash_nolisten(&tcp_hashinfo, newsk); | 1465 | __inet_hash_nolisten(newsk); |
1482 | __inet_inherit_port(&tcp_hashinfo, sk, newsk); | 1466 | __inet_inherit_port(sk, newsk); |
1483 | 1467 | ||
1484 | return newsk; | 1468 | return newsk; |
1485 | 1469 | ||
@@ -1827,6 +1811,7 @@ struct inet_connection_sock_af_ops ipv4_specific = { | |||
1827 | .getsockopt = ip_getsockopt, | 1811 | .getsockopt = ip_getsockopt, |
1828 | .addr2sockaddr = inet_csk_addr2sockaddr, | 1812 | .addr2sockaddr = inet_csk_addr2sockaddr, |
1829 | .sockaddr_len = sizeof(struct sockaddr_in), | 1813 | .sockaddr_len = sizeof(struct sockaddr_in), |
1814 | .bind_conflict = inet_csk_bind_conflict, | ||
1830 | #ifdef CONFIG_COMPAT | 1815 | #ifdef CONFIG_COMPAT |
1831 | .compat_setsockopt = compat_ip_setsockopt, | 1816 | .compat_setsockopt = compat_ip_setsockopt, |
1832 | .compat_getsockopt = compat_ip_getsockopt, | 1817 | .compat_getsockopt = compat_ip_getsockopt, |
@@ -1926,7 +1911,7 @@ int tcp_v4_destroy_sock(struct sock *sk) | |||
1926 | 1911 | ||
1927 | /* Clean up a referenced TCP bind bucket. */ | 1912 | /* Clean up a referenced TCP bind bucket. */ |
1928 | if (inet_csk(sk)->icsk_bind_hash) | 1913 | if (inet_csk(sk)->icsk_bind_hash) |
1929 | inet_put_port(&tcp_hashinfo, sk); | 1914 | inet_put_port(sk); |
1930 | 1915 | ||
1931 | /* | 1916 | /* |
1932 | * If sendmsg cached page exists, toss it. | 1917 | * If sendmsg cached page exists, toss it. |
@@ -2435,9 +2420,9 @@ struct proto tcp_prot = { | |||
2435 | .getsockopt = tcp_getsockopt, | 2420 | .getsockopt = tcp_getsockopt, |
2436 | .recvmsg = tcp_recvmsg, | 2421 | .recvmsg = tcp_recvmsg, |
2437 | .backlog_rcv = tcp_v4_do_rcv, | 2422 | .backlog_rcv = tcp_v4_do_rcv, |
2438 | .hash = tcp_v4_hash, | 2423 | .hash = inet_hash, |
2439 | .unhash = tcp_unhash, | 2424 | .unhash = inet_unhash, |
2440 | .get_port = tcp_v4_get_port, | 2425 | .get_port = inet_csk_get_port, |
2441 | .enter_memory_pressure = tcp_enter_memory_pressure, | 2426 | .enter_memory_pressure = tcp_enter_memory_pressure, |
2442 | .sockets_allocated = &tcp_sockets_allocated, | 2427 | .sockets_allocated = &tcp_sockets_allocated, |
2443 | .orphan_count = &tcp_orphan_count, | 2428 | .orphan_count = &tcp_orphan_count, |
@@ -2450,6 +2435,7 @@ struct proto tcp_prot = { | |||
2450 | .obj_size = sizeof(struct tcp_sock), | 2435 | .obj_size = sizeof(struct tcp_sock), |
2451 | .twsk_prot = &tcp_timewait_sock_ops, | 2436 | .twsk_prot = &tcp_timewait_sock_ops, |
2452 | .rsk_prot = &tcp_request_sock_ops, | 2437 | .rsk_prot = &tcp_request_sock_ops, |
2438 | .hashinfo = &tcp_hashinfo, | ||
2453 | #ifdef CONFIG_COMPAT | 2439 | #ifdef CONFIG_COMPAT |
2454 | .compat_setsockopt = compat_tcp_setsockopt, | 2440 | .compat_setsockopt = compat_tcp_setsockopt, |
2455 | .compat_getsockopt = compat_tcp_getsockopt, | 2441 | .compat_getsockopt = compat_tcp_getsockopt, |
@@ -2467,7 +2453,6 @@ void __init tcp_v4_init(struct net_proto_family *ops) | |||
2467 | EXPORT_SYMBOL(ipv4_specific); | 2453 | EXPORT_SYMBOL(ipv4_specific); |
2468 | EXPORT_SYMBOL(tcp_hashinfo); | 2454 | EXPORT_SYMBOL(tcp_hashinfo); |
2469 | EXPORT_SYMBOL(tcp_prot); | 2455 | EXPORT_SYMBOL(tcp_prot); |
2470 | EXPORT_SYMBOL(tcp_unhash); | ||
2471 | EXPORT_SYMBOL(tcp_v4_conn_request); | 2456 | EXPORT_SYMBOL(tcp_v4_conn_request); |
2472 | EXPORT_SYMBOL(tcp_v4_connect); | 2457 | EXPORT_SYMBOL(tcp_v4_connect); |
2473 | EXPORT_SYMBOL(tcp_v4_do_rcv); | 2458 | EXPORT_SYMBOL(tcp_v4_do_rcv); |
diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index e093a7b59e18..b47030ba162b 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c | |||
@@ -102,7 +102,7 @@ static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb) | |||
102 | 102 | ||
103 | XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr; | 103 | XFRM_MODE_SKB_CB(skb)->protocol = ph->nexthdr; |
104 | 104 | ||
105 | if (!pskb_may_pull(skb, phlen)); | 105 | if (!pskb_may_pull(skb, phlen)) |
106 | goto out; | 106 | goto out; |
107 | __skb_pull(skb, phlen); | 107 | __skb_pull(skb, phlen); |
108 | } | 108 | } |
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index cbb5b9cf84ad..121d517bf91c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -683,7 +683,8 @@ static int icmpv6_rcv(struct sk_buff *skb) | |||
683 | } | 683 | } |
684 | } | 684 | } |
685 | 685 | ||
686 | __skb_pull(skb, sizeof(*hdr)); | 686 | if (!pskb_pull(skb, sizeof(*hdr))) |
687 | goto discard_it; | ||
687 | 688 | ||
688 | hdr = icmp6_hdr(skb); | 689 | hdr = icmp6_hdr(skb); |
689 | 690 | ||
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d325a9958909..99fd25f7f005 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -22,9 +22,9 @@ | |||
22 | #include <net/inet6_hashtables.h> | 22 | #include <net/inet6_hashtables.h> |
23 | #include <net/ip.h> | 23 | #include <net/ip.h> |
24 | 24 | ||
25 | void __inet6_hash(struct inet_hashinfo *hashinfo, | 25 | void __inet6_hash(struct sock *sk) |
26 | struct sock *sk) | ||
27 | { | 26 | { |
27 | struct inet_hashinfo *hashinfo = sk->sk_prot->hashinfo; | ||
28 | struct hlist_head *list; | 28 | struct hlist_head *list; |
29 | rwlock_t *lock; | 29 | rwlock_t *lock; |
30 | 30 | ||
@@ -236,7 +236,7 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk) | |||
236 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, | 236 | int inet6_hash_connect(struct inet_timewait_death_row *death_row, |
237 | struct sock *sk) | 237 | struct sock *sk) |
238 | { | 238 | { |
239 | return __inet_hash_connect(death_row, sk, | 239 | return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), |
240 | __inet6_check_established, __inet6_hash); | 240 | __inet6_check_established, __inet6_hash); |
241 | } | 241 | } |
242 | 242 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 59d0029e93a7..12750f2b05ab 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -86,12 +86,6 @@ static struct tcp_sock_af_ops tcp_sock_ipv6_specific; | |||
86 | static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; | 86 | static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; |
87 | #endif | 87 | #endif |
88 | 88 | ||
89 | static int tcp_v6_get_port(struct sock *sk, unsigned short snum) | ||
90 | { | ||
91 | return inet_csk_get_port(&tcp_hashinfo, sk, snum, | ||
92 | inet6_csk_bind_conflict); | ||
93 | } | ||
94 | |||
95 | static void tcp_v6_hash(struct sock *sk) | 89 | static void tcp_v6_hash(struct sock *sk) |
96 | { | 90 | { |
97 | if (sk->sk_state != TCP_CLOSE) { | 91 | if (sk->sk_state != TCP_CLOSE) { |
@@ -100,7 +94,7 @@ static void tcp_v6_hash(struct sock *sk) | |||
100 | return; | 94 | return; |
101 | } | 95 | } |
102 | local_bh_disable(); | 96 | local_bh_disable(); |
103 | __inet6_hash(&tcp_hashinfo, sk); | 97 | __inet6_hash(sk); |
104 | local_bh_enable(); | 98 | local_bh_enable(); |
105 | } | 99 | } |
106 | } | 100 | } |
@@ -1504,8 +1498,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, | |||
1504 | } | 1498 | } |
1505 | #endif | 1499 | #endif |
1506 | 1500 | ||
1507 | __inet6_hash(&tcp_hashinfo, newsk); | 1501 | __inet6_hash(newsk); |
1508 | inet_inherit_port(&tcp_hashinfo, sk, newsk); | 1502 | inet_inherit_port(sk, newsk); |
1509 | 1503 | ||
1510 | return newsk; | 1504 | return newsk; |
1511 | 1505 | ||
@@ -1833,6 +1827,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = { | |||
1833 | .getsockopt = ipv6_getsockopt, | 1827 | .getsockopt = ipv6_getsockopt, |
1834 | .addr2sockaddr = inet6_csk_addr2sockaddr, | 1828 | .addr2sockaddr = inet6_csk_addr2sockaddr, |
1835 | .sockaddr_len = sizeof(struct sockaddr_in6), | 1829 | .sockaddr_len = sizeof(struct sockaddr_in6), |
1830 | .bind_conflict = inet6_csk_bind_conflict, | ||
1836 | #ifdef CONFIG_COMPAT | 1831 | #ifdef CONFIG_COMPAT |
1837 | .compat_setsockopt = compat_ipv6_setsockopt, | 1832 | .compat_setsockopt = compat_ipv6_setsockopt, |
1838 | .compat_getsockopt = compat_ipv6_getsockopt, | 1833 | .compat_getsockopt = compat_ipv6_getsockopt, |
@@ -1864,6 +1859,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = { | |||
1864 | .getsockopt = ipv6_getsockopt, | 1859 | .getsockopt = ipv6_getsockopt, |
1865 | .addr2sockaddr = inet6_csk_addr2sockaddr, | 1860 | .addr2sockaddr = inet6_csk_addr2sockaddr, |
1866 | .sockaddr_len = sizeof(struct sockaddr_in6), | 1861 | .sockaddr_len = sizeof(struct sockaddr_in6), |
1862 | .bind_conflict = inet6_csk_bind_conflict, | ||
1867 | #ifdef CONFIG_COMPAT | 1863 | #ifdef CONFIG_COMPAT |
1868 | .compat_setsockopt = compat_ipv6_setsockopt, | 1864 | .compat_setsockopt = compat_ipv6_setsockopt, |
1869 | .compat_getsockopt = compat_ipv6_getsockopt, | 1865 | .compat_getsockopt = compat_ipv6_getsockopt, |
@@ -2127,8 +2123,8 @@ struct proto tcpv6_prot = { | |||
2127 | .recvmsg = tcp_recvmsg, | 2123 | .recvmsg = tcp_recvmsg, |
2128 | .backlog_rcv = tcp_v6_do_rcv, | 2124 | .backlog_rcv = tcp_v6_do_rcv, |
2129 | .hash = tcp_v6_hash, | 2125 | .hash = tcp_v6_hash, |
2130 | .unhash = tcp_unhash, | 2126 | .unhash = inet_unhash, |
2131 | .get_port = tcp_v6_get_port, | 2127 | .get_port = inet_csk_get_port, |
2132 | .enter_memory_pressure = tcp_enter_memory_pressure, | 2128 | .enter_memory_pressure = tcp_enter_memory_pressure, |
2133 | .sockets_allocated = &tcp_sockets_allocated, | 2129 | .sockets_allocated = &tcp_sockets_allocated, |
2134 | .memory_allocated = &tcp_memory_allocated, | 2130 | .memory_allocated = &tcp_memory_allocated, |
@@ -2141,6 +2137,7 @@ struct proto tcpv6_prot = { | |||
2141 | .obj_size = sizeof(struct tcp6_sock), | 2137 | .obj_size = sizeof(struct tcp6_sock), |
2142 | .twsk_prot = &tcp6_timewait_sock_ops, | 2138 | .twsk_prot = &tcp6_timewait_sock_ops, |
2143 | .rsk_prot = &tcp6_request_sock_ops, | 2139 | .rsk_prot = &tcp6_request_sock_ops, |
2140 | .hashinfo = &tcp_hashinfo, | ||
2144 | #ifdef CONFIG_COMPAT | 2141 | #ifdef CONFIG_COMPAT |
2145 | .compat_setsockopt = compat_tcp_setsockopt, | 2142 | .compat_setsockopt = compat_tcp_setsockopt, |
2146 | .compat_getsockopt = compat_tcp_getsockopt, | 2143 | .compat_getsockopt = compat_tcp_getsockopt, |
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 09c255002e56..45c7c0c3875e 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig | |||
@@ -1,6 +1,5 @@ | |||
1 | config MAC80211 | 1 | config MAC80211 |
2 | tristate "Generic IEEE 802.11 Networking Stack (mac80211)" | 2 | tristate "Generic IEEE 802.11 Networking Stack (mac80211)" |
3 | depends on EXPERIMENTAL | ||
4 | select CRYPTO | 3 | select CRYPTO |
5 | select CRYPTO_ECB | 4 | select CRYPTO_ECB |
6 | select CRYPTO_ARC4 | 5 | select CRYPTO_ARC4 |
@@ -98,6 +97,18 @@ config MAC80211_DEBUGFS | |||
98 | 97 | ||
99 | Say N unless you know you need this. | 98 | Say N unless you know you need this. |
100 | 99 | ||
100 | config MAC80211_DEBUG_PACKET_ALIGNMENT | ||
101 | bool "Enable packet alignment debugging" | ||
102 | depends on MAC80211 | ||
103 | help | ||
104 | This option is recommended for driver authors and strongly | ||
105 | discouraged for everybody else, it will trigger a warning | ||
106 | when a driver hands mac80211 a buffer that is aligned in | ||
107 | a way that will cause problems with the IP stack on some | ||
108 | architectures. | ||
109 | |||
110 | Say N unless you're writing a mac80211 based driver. | ||
111 | |||
101 | config MAC80211_DEBUG | 112 | config MAC80211_DEBUG |
102 | bool "Enable debugging output" | 113 | bool "Enable debugging output" |
103 | depends on MAC80211 | 114 | depends on MAC80211 |
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c index 5dcc2d61551f..67b7c75c430d 100644 --- a/net/mac80211/ieee80211.c +++ b/net/mac80211/ieee80211.c | |||
@@ -1344,17 +1344,17 @@ static int __init ieee80211_init(void) | |||
1344 | 1344 | ||
1345 | ret = rc80211_simple_init(); | 1345 | ret = rc80211_simple_init(); |
1346 | if (ret) | 1346 | if (ret) |
1347 | goto fail; | 1347 | goto out; |
1348 | 1348 | ||
1349 | ret = rc80211_pid_init(); | 1349 | ret = rc80211_pid_init(); |
1350 | if (ret) | 1350 | if (ret) |
1351 | goto fail_simple; | 1351 | goto out_cleanup_simple; |
1352 | 1352 | ||
1353 | ret = ieee80211_wme_register(); | 1353 | ret = ieee80211_wme_register(); |
1354 | if (ret) { | 1354 | if (ret) { |
1355 | printk(KERN_DEBUG "ieee80211_init: failed to " | 1355 | printk(KERN_DEBUG "ieee80211_init: failed to " |
1356 | "initialize WME (err=%d)\n", ret); | 1356 | "initialize WME (err=%d)\n", ret); |
1357 | goto fail_pid; | 1357 | goto out_cleanup_pid; |
1358 | } | 1358 | } |
1359 | 1359 | ||
1360 | ieee80211_debugfs_netdev_init(); | 1360 | ieee80211_debugfs_netdev_init(); |
@@ -1362,11 +1362,11 @@ static int __init ieee80211_init(void) | |||
1362 | 1362 | ||
1363 | return 0; | 1363 | return 0; |
1364 | 1364 | ||
1365 | fail_pid: | 1365 | out_cleanup_pid: |
1366 | rc80211_simple_exit(); | ||
1367 | fail_simple: | ||
1368 | rc80211_pid_exit(); | 1366 | rc80211_pid_exit(); |
1369 | fail: | 1367 | out_cleanup_simple: |
1368 | rc80211_simple_exit(); | ||
1369 | out: | ||
1370 | return ret; | 1370 | return ret; |
1371 | } | 1371 | } |
1372 | 1372 | ||
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index 554c4baed6fb..c339571632b2 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c | |||
@@ -538,7 +538,7 @@ int __init rc80211_pid_init(void) | |||
538 | return ieee80211_rate_control_register(&mac80211_rcpid); | 538 | return ieee80211_rate_control_register(&mac80211_rcpid); |
539 | } | 539 | } |
540 | 540 | ||
541 | void __exit rc80211_pid_exit(void) | 541 | void rc80211_pid_exit(void) |
542 | { | 542 | { |
543 | ieee80211_rate_control_unregister(&mac80211_rcpid); | 543 | ieee80211_rate_control_unregister(&mac80211_rcpid); |
544 | } | 544 | } |
diff --git a/net/mac80211/rc80211_simple.c b/net/mac80211/rc80211_simple.c index 934676d687d6..9a78b116acff 100644 --- a/net/mac80211/rc80211_simple.c +++ b/net/mac80211/rc80211_simple.c | |||
@@ -389,7 +389,7 @@ int __init rc80211_simple_init(void) | |||
389 | return ieee80211_rate_control_register(&mac80211_rcsimple); | 389 | return ieee80211_rate_control_register(&mac80211_rcsimple); |
390 | } | 390 | } |
391 | 391 | ||
392 | void __exit rc80211_simple_exit(void) | 392 | void rc80211_simple_exit(void) |
393 | { | 393 | { |
394 | ieee80211_rate_control_unregister(&mac80211_rcsimple); | 394 | ieee80211_rate_control_unregister(&mac80211_rcsimple); |
395 | } | 395 | } |
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d44c87269bcb..535407d07fa4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c | |||
@@ -340,11 +340,15 @@ static u32 ieee80211_rx_load_stats(struct ieee80211_local *local, | |||
340 | return load; | 340 | return load; |
341 | } | 341 | } |
342 | 342 | ||
343 | #ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT | ||
343 | static ieee80211_txrx_result | 344 | static ieee80211_txrx_result |
344 | ieee80211_rx_h_verify_ip_alignment(struct ieee80211_txrx_data *rx) | 345 | ieee80211_rx_h_verify_ip_alignment(struct ieee80211_txrx_data *rx) |
345 | { | 346 | { |
346 | int hdrlen; | 347 | int hdrlen; |
347 | 348 | ||
349 | if (!WLAN_FC_DATA_PRESENT(rx->fc)) | ||
350 | return TXRX_CONTINUE; | ||
351 | |||
348 | /* | 352 | /* |
349 | * Drivers are required to align the payload data in a way that | 353 | * Drivers are required to align the payload data in a way that |
350 | * guarantees that the contained IP header is aligned to a four- | 354 | * guarantees that the contained IP header is aligned to a four- |
@@ -371,11 +375,14 @@ ieee80211_rx_h_verify_ip_alignment(struct ieee80211_txrx_data *rx) | |||
371 | 375 | ||
372 | return TXRX_CONTINUE; | 376 | return TXRX_CONTINUE; |
373 | } | 377 | } |
378 | #endif | ||
374 | 379 | ||
375 | ieee80211_rx_handler ieee80211_rx_pre_handlers[] = | 380 | ieee80211_rx_handler ieee80211_rx_pre_handlers[] = |
376 | { | 381 | { |
377 | ieee80211_rx_h_parse_qos, | 382 | ieee80211_rx_h_parse_qos, |
383 | #ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT | ||
378 | ieee80211_rx_h_verify_ip_alignment, | 384 | ieee80211_rx_h_verify_ip_alignment, |
385 | #endif | ||
379 | NULL | 386 | NULL |
380 | }; | 387 | }; |
381 | 388 | ||
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index becf91a952ae..c7ad64d664ad 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c | |||
@@ -90,7 +90,7 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1 | |||
90 | * safely. | 90 | * safely. |
91 | * | 91 | * |
92 | */ | 92 | */ |
93 | static void netlbl_cipsov4_doi_free(struct rcu_head *entry) | 93 | void netlbl_cipsov4_doi_free(struct rcu_head *entry) |
94 | { | 94 | { |
95 | struct cipso_v4_doi *ptr; | 95 | struct cipso_v4_doi *ptr; |
96 | 96 | ||
diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index f03cf9b78286..220cb9d06b49 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h | |||
@@ -163,4 +163,7 @@ enum { | |||
163 | /* NetLabel protocol functions */ | 163 | /* NetLabel protocol functions */ |
164 | int netlbl_cipsov4_genl_init(void); | 164 | int netlbl_cipsov4_genl_init(void); |
165 | 165 | ||
166 | /* Free the memory associated with a CIPSOv4 DOI definition */ | ||
167 | void netlbl_cipsov4_doi_free(struct rcu_head *entry); | ||
168 | |||
166 | #endif | 169 | #endif |
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 3689956c3436..8220990ceb96 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h | |||
@@ -61,6 +61,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, | |||
61 | struct netlbl_audit *audit_info); | 61 | struct netlbl_audit *audit_info); |
62 | int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, | 62 | int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, |
63 | struct netlbl_audit *audit_info); | 63 | struct netlbl_audit *audit_info); |
64 | int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); | ||
64 | int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); | 65 | int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); |
65 | struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); | 66 | struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); |
66 | int netlbl_domhsh_walk(u32 *skip_bkt, | 67 | int netlbl_domhsh_walk(u32 *skip_bkt, |
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index c69e3e1f05c3..39793a1a93aa 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c | |||
@@ -30,6 +30,7 @@ | |||
30 | 30 | ||
31 | #include <linux/init.h> | 31 | #include <linux/init.h> |
32 | #include <linux/types.h> | 32 | #include <linux/types.h> |
33 | #include <linux/audit.h> | ||
33 | #include <net/ip.h> | 34 | #include <net/ip.h> |
34 | #include <net/netlabel.h> | 35 | #include <net/netlabel.h> |
35 | #include <net/cipso_ipv4.h> | 36 | #include <net/cipso_ipv4.h> |
@@ -38,10 +39,186 @@ | |||
38 | 39 | ||
39 | #include "netlabel_domainhash.h" | 40 | #include "netlabel_domainhash.h" |
40 | #include "netlabel_unlabeled.h" | 41 | #include "netlabel_unlabeled.h" |
42 | #include "netlabel_cipso_v4.h" | ||
41 | #include "netlabel_user.h" | 43 | #include "netlabel_user.h" |
42 | #include "netlabel_mgmt.h" | 44 | #include "netlabel_mgmt.h" |
43 | 45 | ||
44 | /* | 46 | /* |
47 | * Configuration Functions | ||
48 | */ | ||
49 | |||
50 | /** | ||
51 | * netlbl_cfg_map_del - Remove a NetLabel/LSM domain mapping | ||
52 | * @domain: the domain mapping to remove | ||
53 | * @audit_info: NetLabel audit information | ||
54 | * | ||
55 | * Description: | ||
56 | * Removes a NetLabel/LSM domain mapping. A @domain value of NULL causes the | ||
57 | * default domain mapping to be removed. Returns zero on success, negative | ||
58 | * values on failure. | ||
59 | * | ||
60 | */ | ||
61 | int netlbl_cfg_map_del(const char *domain, struct netlbl_audit *audit_info) | ||
62 | { | ||
63 | return netlbl_domhsh_remove(domain, audit_info); | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * netlbl_cfg_unlbl_add_map - Add an unlabeled NetLabel/LSM domain mapping | ||
68 | * @domain: the domain mapping to add | ||
69 | * @audit_info: NetLabel audit information | ||
70 | * | ||
71 | * Description: | ||
72 | * Adds a new unlabeled NetLabel/LSM domain mapping. A @domain value of NULL | ||
73 | * causes a new default domain mapping to be added. Returns zero on success, | ||
74 | * negative values on failure. | ||
75 | * | ||
76 | */ | ||
77 | int netlbl_cfg_unlbl_add_map(const char *domain, | ||
78 | struct netlbl_audit *audit_info) | ||
79 | { | ||
80 | int ret_val = -ENOMEM; | ||
81 | struct netlbl_dom_map *entry; | ||
82 | |||
83 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | ||
84 | if (entry == NULL) | ||
85 | goto cfg_unlbl_add_map_failure; | ||
86 | if (domain != NULL) { | ||
87 | entry->domain = kstrdup(domain, GFP_ATOMIC); | ||
88 | if (entry->domain == NULL) | ||
89 | goto cfg_unlbl_add_map_failure; | ||
90 | } | ||
91 | entry->type = NETLBL_NLTYPE_UNLABELED; | ||
92 | |||
93 | ret_val = netlbl_domhsh_add(entry, audit_info); | ||
94 | if (ret_val != 0) | ||
95 | goto cfg_unlbl_add_map_failure; | ||
96 | |||
97 | return 0; | ||
98 | |||
99 | cfg_unlbl_add_map_failure: | ||
100 | if (entry != NULL) | ||
101 | kfree(entry->domain); | ||
102 | kfree(entry); | ||
103 | return ret_val; | ||
104 | } | ||
105 | |||
106 | /** | ||
107 | * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition | ||
108 | * @doi_def: the DOI definition | ||
109 | * @audit_info: NetLabel audit information | ||
110 | * | ||
111 | * Description: | ||
112 | * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on | ||
113 | * success, negative values on failure. | ||
114 | * | ||
115 | */ | ||
116 | int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, | ||
117 | struct netlbl_audit *audit_info) | ||
118 | { | ||
119 | int ret_val; | ||
120 | const char *type_str; | ||
121 | struct audit_buffer *audit_buf; | ||
122 | |||
123 | ret_val = cipso_v4_doi_add(doi_def); | ||
124 | |||
125 | audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, | ||
126 | audit_info); | ||
127 | if (audit_buf != NULL) { | ||
128 | switch (doi_def->type) { | ||
129 | case CIPSO_V4_MAP_STD: | ||
130 | type_str = "std"; | ||
131 | break; | ||
132 | case CIPSO_V4_MAP_PASS: | ||
133 | type_str = "pass"; | ||
134 | break; | ||
135 | default: | ||
136 | type_str = "(unknown)"; | ||
137 | } | ||
138 | audit_log_format(audit_buf, | ||
139 | " cipso_doi=%u cipso_type=%s res=%u", | ||
140 | doi_def->doi, | ||
141 | type_str, | ||
142 | ret_val == 0 ? 1 : 0); | ||
143 | audit_log_end(audit_buf); | ||
144 | } | ||
145 | |||
146 | return ret_val; | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping | ||
151 | * @doi_def: the DOI definition | ||
152 | * @domain: the domain mapping to add | ||
153 | * @audit_info: NetLabel audit information | ||
154 | * | ||
155 | * Description: | ||
156 | * Add a new CIPSOv4 DOI definition and NetLabel/LSM domain mapping for this | ||
157 | * new DOI definition to the NetLabel subsystem. A @domain value of NULL adds | ||
158 | * a new default domain mapping. Returns zero on success, negative values on | ||
159 | * failure. | ||
160 | * | ||
161 | */ | ||
162 | int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, | ||
163 | const char *domain, | ||
164 | struct netlbl_audit *audit_info) | ||
165 | { | ||
166 | int ret_val = -ENOMEM; | ||
167 | struct netlbl_dom_map *entry; | ||
168 | |||
169 | entry = kzalloc(sizeof(*entry), GFP_ATOMIC); | ||
170 | if (entry == NULL) | ||
171 | goto cfg_cipsov4_add_map_failure; | ||
172 | if (domain != NULL) { | ||
173 | entry->domain = kstrdup(domain, GFP_ATOMIC); | ||
174 | if (entry->domain == NULL) | ||
175 | goto cfg_cipsov4_add_map_failure; | ||
176 | } | ||
177 | entry->type = NETLBL_NLTYPE_CIPSOV4; | ||
178 | entry->type_def.cipsov4 = doi_def; | ||
179 | |||
180 | /* Grab a RCU read lock here so nothing happens to the doi_def variable | ||
181 | * between adding it to the CIPSOv4 protocol engine and adding a | ||
182 | * domain mapping for it. */ | ||
183 | |||
184 | rcu_read_lock(); | ||
185 | ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info); | ||
186 | if (ret_val != 0) | ||
187 | goto cfg_cipsov4_add_map_failure_unlock; | ||
188 | ret_val = netlbl_domhsh_add(entry, audit_info); | ||
189 | if (ret_val != 0) | ||
190 | goto cfg_cipsov4_add_map_failure_remove_doi; | ||
191 | rcu_read_unlock(); | ||
192 | |||
193 | return 0; | ||
194 | |||
195 | cfg_cipsov4_add_map_failure_remove_doi: | ||
196 | cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); | ||
197 | cfg_cipsov4_add_map_failure_unlock: | ||
198 | rcu_read_unlock(); | ||
199 | cfg_cipsov4_add_map_failure: | ||
200 | if (entry != NULL) | ||
201 | kfree(entry->domain); | ||
202 | kfree(entry); | ||
203 | return ret_val; | ||
204 | } | ||
205 | |||
206 | /** | ||
207 | * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition | ||
208 | * @doi: the CIPSO DOI value | ||
209 | * @audit_info: NetLabel audit information | ||
210 | * | ||
211 | * Description: | ||
212 | * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem. | ||
213 | * Returns zero on success, negative values on failure. | ||
214 | * | ||
215 | */ | ||
216 | int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info) | ||
217 | { | ||
218 | return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free); | ||
219 | } | ||
220 | |||
221 | /* | ||
45 | * Security Attribute Functions | 222 | * Security Attribute Functions |
46 | */ | 223 | */ |
47 | 224 | ||
diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 6562f868e82f..1a47f5d1be17 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c | |||
@@ -340,7 +340,7 @@ EXPORT_SYMBOL(rfkill_allocate); | |||
340 | * rfkill_free - Mark rfkill structure for deletion | 340 | * rfkill_free - Mark rfkill structure for deletion |
341 | * @rfkill: rfkill structure to be destroyed | 341 | * @rfkill: rfkill structure to be destroyed |
342 | * | 342 | * |
343 | * Decrements reference count of rfkill structure so it is destoryed. | 343 | * Decrements reference count of rfkill structure so it is destroyed. |
344 | * Note that rfkill_free() should _not_ be called after rfkill_unregister(). | 344 | * Note that rfkill_free() should _not_ be called after rfkill_unregister(). |
345 | */ | 345 | */ |
346 | void rfkill_free(struct rfkill *rfkill) | 346 | void rfkill_free(struct rfkill *rfkill) |
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 5a7f6a3060fc..971b867e0484 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/in.h> | 19 | #include <linux/in.h> |
20 | #include <linux/ip.h> | 20 | #include <linux/ip.h> |
21 | #include <linux/ipv6.h> | 21 | #include <linux/ipv6.h> |
22 | #include <linux/if_vlan.h> | ||
22 | 23 | ||
23 | #include <net/pkt_cls.h> | 24 | #include <net/pkt_cls.h> |
24 | #include <net/ip.h> | 25 | #include <net/ip.h> |
@@ -270,6 +271,15 @@ static u32 flow_get_skgid(const struct sk_buff *skb) | |||
270 | return 0; | 271 | return 0; |
271 | } | 272 | } |
272 | 273 | ||
274 | static u32 flow_get_vlan_tag(const struct sk_buff *skb) | ||
275 | { | ||
276 | u16 uninitialized_var(tag); | ||
277 | |||
278 | if (vlan_get_tag(skb, &tag) < 0) | ||
279 | return 0; | ||
280 | return tag & VLAN_VID_MASK; | ||
281 | } | ||
282 | |||
273 | static u32 flow_key_get(const struct sk_buff *skb, int key) | 283 | static u32 flow_key_get(const struct sk_buff *skb, int key) |
274 | { | 284 | { |
275 | switch (key) { | 285 | switch (key) { |
@@ -305,6 +315,8 @@ static u32 flow_key_get(const struct sk_buff *skb, int key) | |||
305 | return flow_get_skuid(skb); | 315 | return flow_get_skuid(skb); |
306 | case FLOW_KEY_SKGID: | 316 | case FLOW_KEY_SKGID: |
307 | return flow_get_skgid(skb); | 317 | return flow_get_skgid(skb); |
318 | case FLOW_KEY_VLAN_TAG: | ||
319 | return flow_get_vlan_tag(skb); | ||
308 | default: | 320 | default: |
309 | WARN_ON(1); | 321 | WARN_ON(1); |
310 | return 0; | 322 | return 0; |
@@ -402,12 +414,13 @@ static int flow_change(struct tcf_proto *tp, unsigned long base, | |||
402 | 414 | ||
403 | if (tb[TCA_FLOW_KEYS]) { | 415 | if (tb[TCA_FLOW_KEYS]) { |
404 | keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); | 416 | keymask = nla_get_u32(tb[TCA_FLOW_KEYS]); |
405 | if (fls(keymask) - 1 > FLOW_KEY_MAX) | ||
406 | return -EOPNOTSUPP; | ||
407 | 417 | ||
408 | nkeys = hweight32(keymask); | 418 | nkeys = hweight32(keymask); |
409 | if (nkeys == 0) | 419 | if (nkeys == 0) |
410 | return -EINVAL; | 420 | return -EINVAL; |
421 | |||
422 | if (fls(keymask) - 1 > FLOW_KEY_MAX) | ||
423 | return -EOPNOTSUPP; | ||
411 | } | 424 | } |
412 | 425 | ||
413 | err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); | 426 | err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &flow_ext_map); |
@@ -594,11 +607,11 @@ static int flow_dump(struct tcf_proto *tp, unsigned long fh, | |||
594 | 607 | ||
595 | if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) | 608 | if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0) |
596 | goto nla_put_failure; | 609 | goto nla_put_failure; |
597 | 610 | #ifdef CONFIG_NET_EMATCH | |
598 | if (f->ematches.hdr.nmatches && | 611 | if (f->ematches.hdr.nmatches && |
599 | tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) | 612 | tcf_em_tree_dump(skb, &f->ematches, TCA_FLOW_EMATCHES) < 0) |
600 | goto nla_put_failure; | 613 | goto nla_put_failure; |
601 | 614 | #endif | |
602 | nla_nest_end(skb, nest); | 615 | nla_nest_end(skb, nest); |
603 | 616 | ||
604 | if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) | 617 | if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0) |
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index a1e5619b1876..2a7e648fbcf4 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c | |||
@@ -65,6 +65,7 @@ | |||
65 | #include <linux/string.h> | 65 | #include <linux/string.h> |
66 | #include <linux/skbuff.h> | 66 | #include <linux/skbuff.h> |
67 | #include <linux/random.h> | 67 | #include <linux/random.h> |
68 | #include <linux/if_vlan.h> | ||
68 | #include <linux/tc_ematch/tc_em_meta.h> | 69 | #include <linux/tc_ematch/tc_em_meta.h> |
69 | #include <net/dst.h> | 70 | #include <net/dst.h> |
70 | #include <net/route.h> | 71 | #include <net/route.h> |
@@ -170,6 +171,21 @@ META_COLLECTOR(var_dev) | |||
170 | } | 171 | } |
171 | 172 | ||
172 | /************************************************************************** | 173 | /************************************************************************** |
174 | * vlan tag | ||
175 | **************************************************************************/ | ||
176 | |||
177 | META_COLLECTOR(int_vlan_tag) | ||
178 | { | ||
179 | unsigned short uninitialized_var(tag); | ||
180 | if (vlan_get_tag(skb, &tag) < 0) | ||
181 | *err = -1; | ||
182 | else | ||
183 | dst->value = tag; | ||
184 | } | ||
185 | |||
186 | |||
187 | |||
188 | /************************************************************************** | ||
173 | * skb attributes | 189 | * skb attributes |
174 | **************************************************************************/ | 190 | **************************************************************************/ |
175 | 191 | ||
@@ -520,6 +536,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { | |||
520 | [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo), | 536 | [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo), |
521 | [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), | 537 | [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), |
522 | [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), | 538 | [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), |
539 | [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), | ||
523 | } | 540 | } |
524 | }; | 541 | }; |
525 | 542 | ||
diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 97e6ebd14500..ae367c82e512 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c | |||
@@ -420,15 +420,15 @@ struct sctp_shared_key *sctp_auth_get_shkey( | |||
420 | const struct sctp_association *asoc, | 420 | const struct sctp_association *asoc, |
421 | __u16 key_id) | 421 | __u16 key_id) |
422 | { | 422 | { |
423 | struct sctp_shared_key *key = NULL; | 423 | struct sctp_shared_key *key; |
424 | 424 | ||
425 | /* First search associations set of endpoint pair shared keys */ | 425 | /* First search associations set of endpoint pair shared keys */ |
426 | key_for_each(key, &asoc->endpoint_shared_keys) { | 426 | key_for_each(key, &asoc->endpoint_shared_keys) { |
427 | if (key->key_id == key_id) | 427 | if (key->key_id == key_id) |
428 | break; | 428 | return key; |
429 | } | 429 | } |
430 | 430 | ||
431 | return key; | 431 | return NULL; |
432 | } | 432 | } |
433 | 433 | ||
434 | /* | 434 | /* |
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 61cbd5a8dd0c..f98658782d4f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c | |||
@@ -537,7 +537,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(const struct sctp_endpoint *ep, | |||
537 | * | 537 | * |
538 | * This means that if we only want to abort associations | 538 | * This means that if we only want to abort associations |
539 | * in an authenticated way (i.e AUTH+ABORT), then we | 539 | * in an authenticated way (i.e AUTH+ABORT), then we |
540 | * can't destory this association just becuase the packet | 540 | * can't destroy this association just becuase the packet |
541 | * was malformed. | 541 | * was malformed. |
542 | */ | 542 | */ |
543 | if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) | 543 | if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) |
@@ -3865,6 +3865,10 @@ sctp_disposition_t sctp_sf_eat_auth(const struct sctp_endpoint *ep, | |||
3865 | struct sctp_chunk *err_chunk; | 3865 | struct sctp_chunk *err_chunk; |
3866 | sctp_ierror_t error; | 3866 | sctp_ierror_t error; |
3867 | 3867 | ||
3868 | /* Make sure that the peer has AUTH capable */ | ||
3869 | if (!asoc->peer.auth_capable) | ||
3870 | return sctp_sf_unk_chunk(ep, asoc, type, arg, commands); | ||
3871 | |||
3868 | if (!sctp_vtag_verify(chunk, asoc)) { | 3872 | if (!sctp_vtag_verify(chunk, asoc)) { |
3869 | sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, | 3873 | sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, |
3870 | SCTP_NULL()); | 3874 | SCTP_NULL()); |
@@ -4130,7 +4134,7 @@ static sctp_disposition_t sctp_sf_abort_violation( | |||
4130 | * | 4134 | * |
4131 | * This means that if we only want to abort associations | 4135 | * This means that if we only want to abort associations |
4132 | * in an authenticated way (i.e AUTH+ABORT), then we | 4136 | * in an authenticated way (i.e AUTH+ABORT), then we |
4133 | * can't destory this association just becuase the packet | 4137 | * can't destroy this association just becuase the packet |
4134 | * was malformed. | 4138 | * was malformed. |
4135 | */ | 4139 | */ |
4136 | if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) | 4140 | if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) |
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 5c69a725e530..92e1dbe50947 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
@@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ | |||
11 | auth.o auth_null.o auth_unix.o \ | 11 | auth.o auth_null.o auth_unix.o \ |
12 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | 12 | svc.o svcsock.o svcauth.o svcauth_unix.o \ |
13 | rpcb_clnt.o timer.o xdr.o \ | 13 | rpcb_clnt.o timer.o xdr.o \ |
14 | sunrpc_syms.o cache.o rpc_pipe.o | 14 | sunrpc_syms.o cache.o rpc_pipe.o \ |
15 | svc_xprt.o | ||
15 | sunrpc-$(CONFIG_PROC_FS) += stats.o | 16 | sunrpc-$(CONFIG_PROC_FS) += stats.o |
16 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o | 17 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73940df6c460..481f984e9a22 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -224,38 +224,34 @@ static int rsi_parse(struct cache_detail *cd, | |||
224 | 224 | ||
225 | /* major/minor */ | 225 | /* major/minor */ |
226 | len = qword_get(&mesg, buf, mlen); | 226 | len = qword_get(&mesg, buf, mlen); |
227 | if (len < 0) | 227 | if (len <= 0) |
228 | goto out; | 228 | goto out; |
229 | if (len == 0) { | 229 | rsii.major_status = simple_strtoul(buf, &ep, 10); |
230 | if (*ep) | ||
231 | goto out; | ||
232 | len = qword_get(&mesg, buf, mlen); | ||
233 | if (len <= 0) | ||
234 | goto out; | ||
235 | rsii.minor_status = simple_strtoul(buf, &ep, 10); | ||
236 | if (*ep) | ||
230 | goto out; | 237 | goto out; |
231 | } else { | ||
232 | rsii.major_status = simple_strtoul(buf, &ep, 10); | ||
233 | if (*ep) | ||
234 | goto out; | ||
235 | len = qword_get(&mesg, buf, mlen); | ||
236 | if (len <= 0) | ||
237 | goto out; | ||
238 | rsii.minor_status = simple_strtoul(buf, &ep, 10); | ||
239 | if (*ep) | ||
240 | goto out; | ||
241 | 238 | ||
242 | /* out_handle */ | 239 | /* out_handle */ |
243 | len = qword_get(&mesg, buf, mlen); | 240 | len = qword_get(&mesg, buf, mlen); |
244 | if (len < 0) | 241 | if (len < 0) |
245 | goto out; | 242 | goto out; |
246 | status = -ENOMEM; | 243 | status = -ENOMEM; |
247 | if (dup_to_netobj(&rsii.out_handle, buf, len)) | 244 | if (dup_to_netobj(&rsii.out_handle, buf, len)) |
248 | goto out; | 245 | goto out; |
249 | 246 | ||
250 | /* out_token */ | 247 | /* out_token */ |
251 | len = qword_get(&mesg, buf, mlen); | 248 | len = qword_get(&mesg, buf, mlen); |
252 | status = -EINVAL; | 249 | status = -EINVAL; |
253 | if (len < 0) | 250 | if (len < 0) |
254 | goto out; | 251 | goto out; |
255 | status = -ENOMEM; | 252 | status = -ENOMEM; |
256 | if (dup_to_netobj(&rsii.out_token, buf, len)) | 253 | if (dup_to_netobj(&rsii.out_token, buf, len)) |
257 | goto out; | 254 | goto out; |
258 | } | ||
259 | rsii.h.expiry_time = expiry; | 255 | rsii.h.expiry_time = expiry; |
260 | rsip = rsi_update(&rsii, rsip); | 256 | rsip = rsi_update(&rsii, rsip); |
261 | status = 0; | 257 | status = 0; |
@@ -975,6 +971,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, | |||
975 | struct kvec *resv = &rqstp->rq_res.head[0]; | 971 | struct kvec *resv = &rqstp->rq_res.head[0]; |
976 | struct xdr_netobj tmpobj; | 972 | struct xdr_netobj tmpobj; |
977 | struct rsi *rsip, rsikey; | 973 | struct rsi *rsip, rsikey; |
974 | int ret; | ||
978 | 975 | ||
979 | /* Read the verifier; should be NULL: */ | 976 | /* Read the verifier; should be NULL: */ |
980 | *authp = rpc_autherr_badverf; | 977 | *authp = rpc_autherr_badverf; |
@@ -1014,23 +1011,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, | |||
1014 | /* No upcall result: */ | 1011 | /* No upcall result: */ |
1015 | return SVC_DROP; | 1012 | return SVC_DROP; |
1016 | case 0: | 1013 | case 0: |
1014 | ret = SVC_DROP; | ||
1017 | /* Got an answer to the upcall; use it: */ | 1015 | /* Got an answer to the upcall; use it: */ |
1018 | if (gss_write_init_verf(rqstp, rsip)) | 1016 | if (gss_write_init_verf(rqstp, rsip)) |
1019 | return SVC_DROP; | 1017 | goto out; |
1020 | if (resv->iov_len + 4 > PAGE_SIZE) | 1018 | if (resv->iov_len + 4 > PAGE_SIZE) |
1021 | return SVC_DROP; | 1019 | goto out; |
1022 | svc_putnl(resv, RPC_SUCCESS); | 1020 | svc_putnl(resv, RPC_SUCCESS); |
1023 | if (svc_safe_putnetobj(resv, &rsip->out_handle)) | 1021 | if (svc_safe_putnetobj(resv, &rsip->out_handle)) |
1024 | return SVC_DROP; | 1022 | goto out; |
1025 | if (resv->iov_len + 3 * 4 > PAGE_SIZE) | 1023 | if (resv->iov_len + 3 * 4 > PAGE_SIZE) |
1026 | return SVC_DROP; | 1024 | goto out; |
1027 | svc_putnl(resv, rsip->major_status); | 1025 | svc_putnl(resv, rsip->major_status); |
1028 | svc_putnl(resv, rsip->minor_status); | 1026 | svc_putnl(resv, rsip->minor_status); |
1029 | svc_putnl(resv, GSS_SEQ_WIN); | 1027 | svc_putnl(resv, GSS_SEQ_WIN); |
1030 | if (svc_safe_putnetobj(resv, &rsip->out_token)) | 1028 | if (svc_safe_putnetobj(resv, &rsip->out_token)) |
1031 | return SVC_DROP; | 1029 | goto out; |
1032 | } | 1030 | } |
1033 | return SVC_COMPLETE; | 1031 | ret = SVC_COMPLETE; |
1032 | out: | ||
1033 | cache_put(&rsip->h, &rsi_cache); | ||
1034 | return ret; | ||
1034 | } | 1035 | } |
1035 | 1036 | ||
1036 | /* | 1037 | /* |
@@ -1125,6 +1126,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
1125 | case RPC_GSS_PROC_DESTROY: | 1126 | case RPC_GSS_PROC_DESTROY: |
1126 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) | 1127 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) |
1127 | goto auth_err; | 1128 | goto auth_err; |
1129 | rsci->h.expiry_time = get_seconds(); | ||
1128 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); | 1130 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); |
1129 | if (resv->iov_len + 4 > PAGE_SIZE) | 1131 | if (resv->iov_len + 4 > PAGE_SIZE) |
1130 | goto drop; | 1132 | goto drop; |
@@ -1386,19 +1388,26 @@ int | |||
1386 | gss_svc_init(void) | 1388 | gss_svc_init(void) |
1387 | { | 1389 | { |
1388 | int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); | 1390 | int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); |
1389 | if (rv == 0) { | 1391 | if (rv) |
1390 | cache_register(&rsc_cache); | 1392 | return rv; |
1391 | cache_register(&rsi_cache); | 1393 | rv = cache_register(&rsc_cache); |
1392 | } | 1394 | if (rv) |
1395 | goto out1; | ||
1396 | rv = cache_register(&rsi_cache); | ||
1397 | if (rv) | ||
1398 | goto out2; | ||
1399 | return 0; | ||
1400 | out2: | ||
1401 | cache_unregister(&rsc_cache); | ||
1402 | out1: | ||
1403 | svc_auth_unregister(RPC_AUTH_GSS); | ||
1393 | return rv; | 1404 | return rv; |
1394 | } | 1405 | } |
1395 | 1406 | ||
1396 | void | 1407 | void |
1397 | gss_svc_shutdown(void) | 1408 | gss_svc_shutdown(void) |
1398 | { | 1409 | { |
1399 | if (cache_unregister(&rsc_cache)) | 1410 | cache_unregister(&rsc_cache); |
1400 | printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); | 1411 | cache_unregister(&rsi_cache); |
1401 | if (cache_unregister(&rsi_cache)) | ||
1402 | printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n"); | ||
1403 | svc_auth_unregister(RPC_AUTH_GSS); | 1412 | svc_auth_unregister(RPC_AUTH_GSS); |
1404 | } | 1413 | } |
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 73f053d0cc7a..636c8e04e0be 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c | |||
@@ -245,6 +245,7 @@ int cache_check(struct cache_detail *detail, | |||
245 | cache_put(h, detail); | 245 | cache_put(h, detail); |
246 | return rv; | 246 | return rv; |
247 | } | 247 | } |
248 | EXPORT_SYMBOL(cache_check); | ||
248 | 249 | ||
249 | /* | 250 | /* |
250 | * caches need to be periodically cleaned. | 251 | * caches need to be periodically cleaned. |
@@ -290,44 +291,78 @@ static const struct file_operations cache_flush_operations; | |||
290 | static void do_cache_clean(struct work_struct *work); | 291 | static void do_cache_clean(struct work_struct *work); |
291 | static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); | 292 | static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); |
292 | 293 | ||
293 | void cache_register(struct cache_detail *cd) | 294 | static void remove_cache_proc_entries(struct cache_detail *cd) |
294 | { | 295 | { |
295 | cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); | 296 | if (cd->proc_ent == NULL) |
296 | if (cd->proc_ent) { | 297 | return; |
297 | struct proc_dir_entry *p; | 298 | if (cd->flush_ent) |
298 | cd->proc_ent->owner = cd->owner; | 299 | remove_proc_entry("flush", cd->proc_ent); |
299 | cd->channel_ent = cd->content_ent = NULL; | 300 | if (cd->channel_ent) |
301 | remove_proc_entry("channel", cd->proc_ent); | ||
302 | if (cd->content_ent) | ||
303 | remove_proc_entry("content", cd->proc_ent); | ||
304 | cd->proc_ent = NULL; | ||
305 | remove_proc_entry(cd->name, proc_net_rpc); | ||
306 | } | ||
300 | 307 | ||
301 | p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, | 308 | #ifdef CONFIG_PROC_FS |
302 | cd->proc_ent); | 309 | static int create_cache_proc_entries(struct cache_detail *cd) |
303 | cd->flush_ent = p; | 310 | { |
304 | if (p) { | 311 | struct proc_dir_entry *p; |
305 | p->proc_fops = &cache_flush_operations; | ||
306 | p->owner = cd->owner; | ||
307 | p->data = cd; | ||
308 | } | ||
309 | 312 | ||
310 | if (cd->cache_request || cd->cache_parse) { | 313 | cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); |
311 | p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, | 314 | if (cd->proc_ent == NULL) |
312 | cd->proc_ent); | 315 | goto out_nomem; |
313 | cd->channel_ent = p; | 316 | cd->proc_ent->owner = cd->owner; |
314 | if (p) { | 317 | cd->channel_ent = cd->content_ent = NULL; |
315 | p->proc_fops = &cache_file_operations; | 318 | |
316 | p->owner = cd->owner; | 319 | p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent); |
317 | p->data = cd; | 320 | cd->flush_ent = p; |
318 | } | 321 | if (p == NULL) |
319 | } | 322 | goto out_nomem; |
320 | if (cd->cache_show) { | 323 | p->proc_fops = &cache_flush_operations; |
321 | p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, | 324 | p->owner = cd->owner; |
322 | cd->proc_ent); | 325 | p->data = cd; |
323 | cd->content_ent = p; | 326 | |
324 | if (p) { | 327 | if (cd->cache_request || cd->cache_parse) { |
325 | p->proc_fops = &content_file_operations; | 328 | p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, |
326 | p->owner = cd->owner; | 329 | cd->proc_ent); |
327 | p->data = cd; | 330 | cd->channel_ent = p; |
328 | } | 331 | if (p == NULL) |
329 | } | 332 | goto out_nomem; |
333 | p->proc_fops = &cache_file_operations; | ||
334 | p->owner = cd->owner; | ||
335 | p->data = cd; | ||
330 | } | 336 | } |
337 | if (cd->cache_show) { | ||
338 | p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, | ||
339 | cd->proc_ent); | ||
340 | cd->content_ent = p; | ||
341 | if (p == NULL) | ||
342 | goto out_nomem; | ||
343 | p->proc_fops = &content_file_operations; | ||
344 | p->owner = cd->owner; | ||
345 | p->data = cd; | ||
346 | } | ||
347 | return 0; | ||
348 | out_nomem: | ||
349 | remove_cache_proc_entries(cd); | ||
350 | return -ENOMEM; | ||
351 | } | ||
352 | #else /* CONFIG_PROC_FS */ | ||
353 | static int create_cache_proc_entries(struct cache_detail *cd) | ||
354 | { | ||
355 | return 0; | ||
356 | } | ||
357 | #endif | ||
358 | |||
359 | int cache_register(struct cache_detail *cd) | ||
360 | { | ||
361 | int ret; | ||
362 | |||
363 | ret = create_cache_proc_entries(cd); | ||
364 | if (ret) | ||
365 | return ret; | ||
331 | rwlock_init(&cd->hash_lock); | 366 | rwlock_init(&cd->hash_lock); |
332 | INIT_LIST_HEAD(&cd->queue); | 367 | INIT_LIST_HEAD(&cd->queue); |
333 | spin_lock(&cache_list_lock); | 368 | spin_lock(&cache_list_lock); |
@@ -341,9 +376,11 @@ void cache_register(struct cache_detail *cd) | |||
341 | 376 | ||
342 | /* start the cleaning process */ | 377 | /* start the cleaning process */ |
343 | schedule_delayed_work(&cache_cleaner, 0); | 378 | schedule_delayed_work(&cache_cleaner, 0); |
379 | return 0; | ||
344 | } | 380 | } |
381 | EXPORT_SYMBOL(cache_register); | ||
345 | 382 | ||
346 | int cache_unregister(struct cache_detail *cd) | 383 | void cache_unregister(struct cache_detail *cd) |
347 | { | 384 | { |
348 | cache_purge(cd); | 385 | cache_purge(cd); |
349 | spin_lock(&cache_list_lock); | 386 | spin_lock(&cache_list_lock); |
@@ -351,30 +388,23 @@ int cache_unregister(struct cache_detail *cd) | |||
351 | if (cd->entries || atomic_read(&cd->inuse)) { | 388 | if (cd->entries || atomic_read(&cd->inuse)) { |
352 | write_unlock(&cd->hash_lock); | 389 | write_unlock(&cd->hash_lock); |
353 | spin_unlock(&cache_list_lock); | 390 | spin_unlock(&cache_list_lock); |
354 | return -EBUSY; | 391 | goto out; |
355 | } | 392 | } |
356 | if (current_detail == cd) | 393 | if (current_detail == cd) |
357 | current_detail = NULL; | 394 | current_detail = NULL; |
358 | list_del_init(&cd->others); | 395 | list_del_init(&cd->others); |
359 | write_unlock(&cd->hash_lock); | 396 | write_unlock(&cd->hash_lock); |
360 | spin_unlock(&cache_list_lock); | 397 | spin_unlock(&cache_list_lock); |
361 | if (cd->proc_ent) { | 398 | remove_cache_proc_entries(cd); |
362 | if (cd->flush_ent) | ||
363 | remove_proc_entry("flush", cd->proc_ent); | ||
364 | if (cd->channel_ent) | ||
365 | remove_proc_entry("channel", cd->proc_ent); | ||
366 | if (cd->content_ent) | ||
367 | remove_proc_entry("content", cd->proc_ent); | ||
368 | |||
369 | cd->proc_ent = NULL; | ||
370 | remove_proc_entry(cd->name, proc_net_rpc); | ||
371 | } | ||
372 | if (list_empty(&cache_list)) { | 399 | if (list_empty(&cache_list)) { |
373 | /* module must be being unloaded so its safe to kill the worker */ | 400 | /* module must be being unloaded so its safe to kill the worker */ |
374 | cancel_delayed_work_sync(&cache_cleaner); | 401 | cancel_delayed_work_sync(&cache_cleaner); |
375 | } | 402 | } |
376 | return 0; | 403 | return; |
404 | out: | ||
405 | printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); | ||
377 | } | 406 | } |
407 | EXPORT_SYMBOL(cache_unregister); | ||
378 | 408 | ||
379 | /* clean cache tries to find something to clean | 409 | /* clean cache tries to find something to clean |
380 | * and cleans it. | 410 | * and cleans it. |
@@ -489,6 +519,7 @@ void cache_flush(void) | |||
489 | while (cache_clean() != -1) | 519 | while (cache_clean() != -1) |
490 | cond_resched(); | 520 | cond_resched(); |
491 | } | 521 | } |
522 | EXPORT_SYMBOL(cache_flush); | ||
492 | 523 | ||
493 | void cache_purge(struct cache_detail *detail) | 524 | void cache_purge(struct cache_detail *detail) |
494 | { | 525 | { |
@@ -497,7 +528,7 @@ void cache_purge(struct cache_detail *detail) | |||
497 | cache_flush(); | 528 | cache_flush(); |
498 | detail->flush_time = 1; | 529 | detail->flush_time = 1; |
499 | } | 530 | } |
500 | 531 | EXPORT_SYMBOL(cache_purge); | |
501 | 532 | ||
502 | 533 | ||
503 | /* | 534 | /* |
@@ -634,13 +665,13 @@ void cache_clean_deferred(void *owner) | |||
634 | /* | 665 | /* |
635 | * communicate with user-space | 666 | * communicate with user-space |
636 | * | 667 | * |
637 | * We have a magic /proc file - /proc/sunrpc/cache | 668 | * We have a magic /proc file - /proc/sunrpc/<cachename>/channel. |
638 | * On read, you get a full request, or block | 669 | * On read, you get a full request, or block. |
639 | * On write, an update request is processed | 670 | * On write, an update request is processed. |
640 | * Poll works if anything to read, and always allows write | 671 | * Poll works if anything to read, and always allows write. |
641 | * | 672 | * |
642 | * Implemented by linked list of requests. Each open file has | 673 | * Implemented by linked list of requests. Each open file has |
643 | * a ->private that also exists in this list. New request are added | 674 | * a ->private that also exists in this list. New requests are added |
644 | * to the end and may wakeup and preceding readers. | 675 | * to the end and may wakeup and preceding readers. |
645 | * New readers are added to the head. If, on read, an item is found with | 676 | * New readers are added to the head. If, on read, an item is found with |
646 | * CACHE_UPCALLING clear, we free it from the list. | 677 | * CACHE_UPCALLING clear, we free it from the list. |
@@ -963,6 +994,7 @@ void qword_add(char **bpp, int *lp, char *str) | |||
963 | *bpp = bp; | 994 | *bpp = bp; |
964 | *lp = len; | 995 | *lp = len; |
965 | } | 996 | } |
997 | EXPORT_SYMBOL(qword_add); | ||
966 | 998 | ||
967 | void qword_addhex(char **bpp, int *lp, char *buf, int blen) | 999 | void qword_addhex(char **bpp, int *lp, char *buf, int blen) |
968 | { | 1000 | { |
@@ -991,6 +1023,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen) | |||
991 | *bpp = bp; | 1023 | *bpp = bp; |
992 | *lp = len; | 1024 | *lp = len; |
993 | } | 1025 | } |
1026 | EXPORT_SYMBOL(qword_addhex); | ||
994 | 1027 | ||
995 | static void warn_no_listener(struct cache_detail *detail) | 1028 | static void warn_no_listener(struct cache_detail *detail) |
996 | { | 1029 | { |
@@ -1113,6 +1146,7 @@ int qword_get(char **bpp, char *dest, int bufsize) | |||
1113 | *dest = '\0'; | 1146 | *dest = '\0'; |
1114 | return len; | 1147 | return len; |
1115 | } | 1148 | } |
1149 | EXPORT_SYMBOL(qword_get); | ||
1116 | 1150 | ||
1117 | 1151 | ||
1118 | /* | 1152 | /* |
@@ -1244,18 +1278,18 @@ static ssize_t read_flush(struct file *file, char __user *buf, | |||
1244 | struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; | 1278 | struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; |
1245 | char tbuf[20]; | 1279 | char tbuf[20]; |
1246 | unsigned long p = *ppos; | 1280 | unsigned long p = *ppos; |
1247 | int len; | 1281 | size_t len; |
1248 | 1282 | ||
1249 | sprintf(tbuf, "%lu\n", cd->flush_time); | 1283 | sprintf(tbuf, "%lu\n", cd->flush_time); |
1250 | len = strlen(tbuf); | 1284 | len = strlen(tbuf); |
1251 | if (p >= len) | 1285 | if (p >= len) |
1252 | return 0; | 1286 | return 0; |
1253 | len -= p; | 1287 | len -= p; |
1254 | if (len > count) len = count; | 1288 | if (len > count) |
1289 | len = count; | ||
1255 | if (copy_to_user(buf, (void*)(tbuf+p), len)) | 1290 | if (copy_to_user(buf, (void*)(tbuf+p), len)) |
1256 | len = -EFAULT; | 1291 | return -EFAULT; |
1257 | else | 1292 | *ppos += len; |
1258 | *ppos += len; | ||
1259 | return len; | 1293 | return len; |
1260 | } | 1294 | } |
1261 | 1295 | ||
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 74df2d358e61..5a16875f5ac8 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c | |||
@@ -33,7 +33,7 @@ struct proc_dir_entry *proc_net_rpc = NULL; | |||
33 | static int rpc_proc_show(struct seq_file *seq, void *v) { | 33 | static int rpc_proc_show(struct seq_file *seq, void *v) { |
34 | const struct rpc_stat *statp = seq->private; | 34 | const struct rpc_stat *statp = seq->private; |
35 | const struct rpc_program *prog = statp->program; | 35 | const struct rpc_program *prog = statp->program; |
36 | int i, j; | 36 | unsigned int i, j; |
37 | 37 | ||
38 | seq_printf(seq, | 38 | seq_printf(seq, |
39 | "net %u %u %u %u\n", | 39 | "net %u %u %u %u\n", |
@@ -81,7 +81,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { | |||
81 | const struct svc_program *prog = statp->program; | 81 | const struct svc_program *prog = statp->program; |
82 | const struct svc_procedure *proc; | 82 | const struct svc_procedure *proc; |
83 | const struct svc_version *vers; | 83 | const struct svc_version *vers; |
84 | int i, j; | 84 | unsigned int i, j; |
85 | 85 | ||
86 | seq_printf(seq, | 86 | seq_printf(seq, |
87 | "net %u %u %u %u\n", | 87 | "net %u %u %u %u\n", |
@@ -106,6 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { | |||
106 | seq_putc(seq, '\n'); | 106 | seq_putc(seq, '\n'); |
107 | } | 107 | } |
108 | } | 108 | } |
109 | EXPORT_SYMBOL(svc_seq_show); | ||
109 | 110 | ||
110 | /** | 111 | /** |
111 | * rpc_alloc_iostats - allocate an rpc_iostats structure | 112 | * rpc_alloc_iostats - allocate an rpc_iostats structure |
@@ -255,12 +256,14 @@ svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) | |||
255 | { | 256 | { |
256 | return do_register(statp->program->pg_name, statp, fops); | 257 | return do_register(statp->program->pg_name, statp, fops); |
257 | } | 258 | } |
259 | EXPORT_SYMBOL(svc_proc_register); | ||
258 | 260 | ||
259 | void | 261 | void |
260 | svc_proc_unregister(const char *name) | 262 | svc_proc_unregister(const char *name) |
261 | { | 263 | { |
262 | remove_proc_entry(name, proc_net_rpc); | 264 | remove_proc_entry(name, proc_net_rpc); |
263 | } | 265 | } |
266 | EXPORT_SYMBOL(svc_proc_unregister); | ||
264 | 267 | ||
265 | void | 268 | void |
266 | rpc_proc_init(void) | 269 | rpc_proc_init(void) |
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 1a7e309d008b..843629f55763 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -22,48 +22,6 @@ | |||
22 | #include <linux/sunrpc/rpc_pipe_fs.h> | 22 | #include <linux/sunrpc/rpc_pipe_fs.h> |
23 | #include <linux/sunrpc/xprtsock.h> | 23 | #include <linux/sunrpc/xprtsock.h> |
24 | 24 | ||
25 | /* RPC server stuff */ | ||
26 | EXPORT_SYMBOL(svc_create); | ||
27 | EXPORT_SYMBOL(svc_create_thread); | ||
28 | EXPORT_SYMBOL(svc_create_pooled); | ||
29 | EXPORT_SYMBOL(svc_set_num_threads); | ||
30 | EXPORT_SYMBOL(svc_exit_thread); | ||
31 | EXPORT_SYMBOL(svc_destroy); | ||
32 | EXPORT_SYMBOL(svc_drop); | ||
33 | EXPORT_SYMBOL(svc_process); | ||
34 | EXPORT_SYMBOL(svc_recv); | ||
35 | EXPORT_SYMBOL(svc_wake_up); | ||
36 | EXPORT_SYMBOL(svc_makesock); | ||
37 | EXPORT_SYMBOL(svc_reserve); | ||
38 | EXPORT_SYMBOL(svc_auth_register); | ||
39 | EXPORT_SYMBOL(auth_domain_lookup); | ||
40 | EXPORT_SYMBOL(svc_authenticate); | ||
41 | EXPORT_SYMBOL(svc_set_client); | ||
42 | |||
43 | /* RPC statistics */ | ||
44 | #ifdef CONFIG_PROC_FS | ||
45 | EXPORT_SYMBOL(svc_proc_register); | ||
46 | EXPORT_SYMBOL(svc_proc_unregister); | ||
47 | EXPORT_SYMBOL(svc_seq_show); | ||
48 | #endif | ||
49 | |||
50 | /* caching... */ | ||
51 | EXPORT_SYMBOL(auth_domain_find); | ||
52 | EXPORT_SYMBOL(auth_domain_put); | ||
53 | EXPORT_SYMBOL(auth_unix_add_addr); | ||
54 | EXPORT_SYMBOL(auth_unix_forget_old); | ||
55 | EXPORT_SYMBOL(auth_unix_lookup); | ||
56 | EXPORT_SYMBOL(cache_check); | ||
57 | EXPORT_SYMBOL(cache_flush); | ||
58 | EXPORT_SYMBOL(cache_purge); | ||
59 | EXPORT_SYMBOL(cache_register); | ||
60 | EXPORT_SYMBOL(cache_unregister); | ||
61 | EXPORT_SYMBOL(qword_add); | ||
62 | EXPORT_SYMBOL(qword_addhex); | ||
63 | EXPORT_SYMBOL(qword_get); | ||
64 | EXPORT_SYMBOL(svcauth_unix_purge); | ||
65 | EXPORT_SYMBOL(unix_domain_find); | ||
66 | |||
67 | extern struct cache_detail ip_map_cache, unix_gid_cache; | 25 | extern struct cache_detail ip_map_cache, unix_gid_cache; |
68 | 26 | ||
69 | static int __init | 27 | static int __init |
@@ -85,7 +43,8 @@ init_sunrpc(void) | |||
85 | #endif | 43 | #endif |
86 | cache_register(&ip_map_cache); | 44 | cache_register(&ip_map_cache); |
87 | cache_register(&unix_gid_cache); | 45 | cache_register(&unix_gid_cache); |
88 | init_socket_xprt(); | 46 | svc_init_xprt_sock(); /* svc sock transport */ |
47 | init_socket_xprt(); /* clnt sock transport */ | ||
89 | rpcauth_init_module(); | 48 | rpcauth_init_module(); |
90 | out: | 49 | out: |
91 | return err; | 50 | return err; |
@@ -96,12 +55,11 @@ cleanup_sunrpc(void) | |||
96 | { | 55 | { |
97 | rpcauth_remove_module(); | 56 | rpcauth_remove_module(); |
98 | cleanup_socket_xprt(); | 57 | cleanup_socket_xprt(); |
58 | svc_cleanup_xprt_sock(); | ||
99 | unregister_rpc_pipefs(); | 59 | unregister_rpc_pipefs(); |
100 | rpc_destroy_mempool(); | 60 | rpc_destroy_mempool(); |
101 | if (cache_unregister(&ip_map_cache)) | 61 | cache_unregister(&ip_map_cache); |
102 | printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); | 62 | cache_unregister(&unix_gid_cache); |
103 | if (cache_unregister(&unix_gid_cache)) | ||
104 | printk(KERN_ERR "sunrpc: failed to unregister unix_gid cache\n"); | ||
105 | #ifdef RPC_DEBUG | 63 | #ifdef RPC_DEBUG |
106 | rpc_unregister_sysctl(); | 64 | rpc_unregister_sysctl(); |
107 | #endif | 65 | #endif |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4ad5fbbb18b4..a290e1523297 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -364,7 +364,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
364 | void (*shutdown)(struct svc_serv *serv)) | 364 | void (*shutdown)(struct svc_serv *serv)) |
365 | { | 365 | { |
366 | struct svc_serv *serv; | 366 | struct svc_serv *serv; |
367 | int vers; | 367 | unsigned int vers; |
368 | unsigned int xdrsize; | 368 | unsigned int xdrsize; |
369 | unsigned int i; | 369 | unsigned int i; |
370 | 370 | ||
@@ -433,6 +433,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
433 | { | 433 | { |
434 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); | 434 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); |
435 | } | 435 | } |
436 | EXPORT_SYMBOL(svc_create); | ||
436 | 437 | ||
437 | struct svc_serv * | 438 | struct svc_serv * |
438 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | 439 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, |
@@ -452,6 +453,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | |||
452 | 453 | ||
453 | return serv; | 454 | return serv; |
454 | } | 455 | } |
456 | EXPORT_SYMBOL(svc_create_pooled); | ||
455 | 457 | ||
456 | /* | 458 | /* |
457 | * Destroy an RPC service. Should be called with the BKL held | 459 | * Destroy an RPC service. Should be called with the BKL held |
@@ -459,9 +461,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | |||
459 | void | 461 | void |
460 | svc_destroy(struct svc_serv *serv) | 462 | svc_destroy(struct svc_serv *serv) |
461 | { | 463 | { |
462 | struct svc_sock *svsk; | ||
463 | struct svc_sock *tmp; | ||
464 | |||
465 | dprintk("svc: svc_destroy(%s, %d)\n", | 464 | dprintk("svc: svc_destroy(%s, %d)\n", |
466 | serv->sv_program->pg_name, | 465 | serv->sv_program->pg_name, |
467 | serv->sv_nrthreads); | 466 | serv->sv_nrthreads); |
@@ -476,14 +475,12 @@ svc_destroy(struct svc_serv *serv) | |||
476 | 475 | ||
477 | del_timer_sync(&serv->sv_temptimer); | 476 | del_timer_sync(&serv->sv_temptimer); |
478 | 477 | ||
479 | list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list) | 478 | svc_close_all(&serv->sv_tempsocks); |
480 | svc_force_close_socket(svsk); | ||
481 | 479 | ||
482 | if (serv->sv_shutdown) | 480 | if (serv->sv_shutdown) |
483 | serv->sv_shutdown(serv); | 481 | serv->sv_shutdown(serv); |
484 | 482 | ||
485 | list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list) | 483 | svc_close_all(&serv->sv_permsocks); |
486 | svc_force_close_socket(svsk); | ||
487 | 484 | ||
488 | BUG_ON(!list_empty(&serv->sv_permsocks)); | 485 | BUG_ON(!list_empty(&serv->sv_permsocks)); |
489 | BUG_ON(!list_empty(&serv->sv_tempsocks)); | 486 | BUG_ON(!list_empty(&serv->sv_tempsocks)); |
@@ -498,6 +495,7 @@ svc_destroy(struct svc_serv *serv) | |||
498 | kfree(serv->sv_pools); | 495 | kfree(serv->sv_pools); |
499 | kfree(serv); | 496 | kfree(serv); |
500 | } | 497 | } |
498 | EXPORT_SYMBOL(svc_destroy); | ||
501 | 499 | ||
502 | /* | 500 | /* |
503 | * Allocate an RPC server's buffer space. | 501 | * Allocate an RPC server's buffer space. |
@@ -536,31 +534,17 @@ svc_release_buffer(struct svc_rqst *rqstp) | |||
536 | put_page(rqstp->rq_pages[i]); | 534 | put_page(rqstp->rq_pages[i]); |
537 | } | 535 | } |
538 | 536 | ||
539 | /* | 537 | struct svc_rqst * |
540 | * Create a thread in the given pool. Caller must hold BKL. | 538 | svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) |
541 | * On a NUMA or SMP machine, with a multi-pool serv, the thread | ||
542 | * will be restricted to run on the cpus belonging to the pool. | ||
543 | */ | ||
544 | static int | ||
545 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | ||
546 | struct svc_pool *pool) | ||
547 | { | 539 | { |
548 | struct svc_rqst *rqstp; | 540 | struct svc_rqst *rqstp; |
549 | int error = -ENOMEM; | ||
550 | int have_oldmask = 0; | ||
551 | cpumask_t oldmask; | ||
552 | 541 | ||
553 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); | 542 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); |
554 | if (!rqstp) | 543 | if (!rqstp) |
555 | goto out; | 544 | goto out_enomem; |
556 | 545 | ||
557 | init_waitqueue_head(&rqstp->rq_wait); | 546 | init_waitqueue_head(&rqstp->rq_wait); |
558 | 547 | ||
559 | if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
560 | || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
561 | || !svc_init_buffer(rqstp, serv->sv_max_mesg)) | ||
562 | goto out_thread; | ||
563 | |||
564 | serv->sv_nrthreads++; | 548 | serv->sv_nrthreads++; |
565 | spin_lock_bh(&pool->sp_lock); | 549 | spin_lock_bh(&pool->sp_lock); |
566 | pool->sp_nrthreads++; | 550 | pool->sp_nrthreads++; |
@@ -569,6 +553,45 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | |||
569 | rqstp->rq_server = serv; | 553 | rqstp->rq_server = serv; |
570 | rqstp->rq_pool = pool; | 554 | rqstp->rq_pool = pool; |
571 | 555 | ||
556 | rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); | ||
557 | if (!rqstp->rq_argp) | ||
558 | goto out_thread; | ||
559 | |||
560 | rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); | ||
561 | if (!rqstp->rq_resp) | ||
562 | goto out_thread; | ||
563 | |||
564 | if (!svc_init_buffer(rqstp, serv->sv_max_mesg)) | ||
565 | goto out_thread; | ||
566 | |||
567 | return rqstp; | ||
568 | out_thread: | ||
569 | svc_exit_thread(rqstp); | ||
570 | out_enomem: | ||
571 | return ERR_PTR(-ENOMEM); | ||
572 | } | ||
573 | EXPORT_SYMBOL(svc_prepare_thread); | ||
574 | |||
575 | /* | ||
576 | * Create a thread in the given pool. Caller must hold BKL. | ||
577 | * On a NUMA or SMP machine, with a multi-pool serv, the thread | ||
578 | * will be restricted to run on the cpus belonging to the pool. | ||
579 | */ | ||
580 | static int | ||
581 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | ||
582 | struct svc_pool *pool) | ||
583 | { | ||
584 | struct svc_rqst *rqstp; | ||
585 | int error = -ENOMEM; | ||
586 | int have_oldmask = 0; | ||
587 | cpumask_t oldmask; | ||
588 | |||
589 | rqstp = svc_prepare_thread(serv, pool); | ||
590 | if (IS_ERR(rqstp)) { | ||
591 | error = PTR_ERR(rqstp); | ||
592 | goto out; | ||
593 | } | ||
594 | |||
572 | if (serv->sv_nrpools > 1) | 595 | if (serv->sv_nrpools > 1) |
573 | have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); | 596 | have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); |
574 | 597 | ||
@@ -597,6 +620,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | |||
597 | { | 620 | { |
598 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); | 621 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); |
599 | } | 622 | } |
623 | EXPORT_SYMBOL(svc_create_thread); | ||
600 | 624 | ||
601 | /* | 625 | /* |
602 | * Choose a pool in which to create a new thread, for svc_set_num_threads | 626 | * Choose a pool in which to create a new thread, for svc_set_num_threads |
@@ -700,6 +724,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) | |||
700 | 724 | ||
701 | return error; | 725 | return error; |
702 | } | 726 | } |
727 | EXPORT_SYMBOL(svc_set_num_threads); | ||
703 | 728 | ||
704 | /* | 729 | /* |
705 | * Called from a server thread as it's exiting. Caller must hold BKL. | 730 | * Called from a server thread as it's exiting. Caller must hold BKL. |
@@ -726,6 +751,7 @@ svc_exit_thread(struct svc_rqst *rqstp) | |||
726 | if (serv) | 751 | if (serv) |
727 | svc_destroy(serv); | 752 | svc_destroy(serv); |
728 | } | 753 | } |
754 | EXPORT_SYMBOL(svc_exit_thread); | ||
729 | 755 | ||
730 | /* | 756 | /* |
731 | * Register an RPC service with the local portmapper. | 757 | * Register an RPC service with the local portmapper. |
@@ -737,7 +763,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) | |||
737 | { | 763 | { |
738 | struct svc_program *progp; | 764 | struct svc_program *progp; |
739 | unsigned long flags; | 765 | unsigned long flags; |
740 | int i, error = 0, dummy; | 766 | unsigned int i; |
767 | int error = 0, dummy; | ||
741 | 768 | ||
742 | if (!port) | 769 | if (!port) |
743 | clear_thread_flag(TIF_SIGPENDING); | 770 | clear_thread_flag(TIF_SIGPENDING); |
@@ -840,9 +867,9 @@ svc_process(struct svc_rqst *rqstp) | |||
840 | rqstp->rq_res.tail[0].iov_len = 0; | 867 | rqstp->rq_res.tail[0].iov_len = 0; |
841 | /* Will be turned off only in gss privacy case: */ | 868 | /* Will be turned off only in gss privacy case: */ |
842 | rqstp->rq_splice_ok = 1; | 869 | rqstp->rq_splice_ok = 1; |
843 | /* tcp needs a space for the record length... */ | 870 | |
844 | if (rqstp->rq_prot == IPPROTO_TCP) | 871 | /* Setup reply header */ |
845 | svc_putnl(resv, 0); | 872 | rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); |
846 | 873 | ||
847 | rqstp->rq_xid = svc_getu32(argv); | 874 | rqstp->rq_xid = svc_getu32(argv); |
848 | svc_putu32(resv, rqstp->rq_xid); | 875 | svc_putu32(resv, rqstp->rq_xid); |
@@ -1049,16 +1076,15 @@ err_bad: | |||
1049 | svc_putnl(resv, ntohl(rpc_stat)); | 1076 | svc_putnl(resv, ntohl(rpc_stat)); |
1050 | goto sendit; | 1077 | goto sendit; |
1051 | } | 1078 | } |
1079 | EXPORT_SYMBOL(svc_process); | ||
1052 | 1080 | ||
1053 | /* | 1081 | /* |
1054 | * Return (transport-specific) limit on the rpc payload. | 1082 | * Return (transport-specific) limit on the rpc payload. |
1055 | */ | 1083 | */ |
1056 | u32 svc_max_payload(const struct svc_rqst *rqstp) | 1084 | u32 svc_max_payload(const struct svc_rqst *rqstp) |
1057 | { | 1085 | { |
1058 | int max = RPCSVC_MAXPAYLOAD_TCP; | 1086 | u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload; |
1059 | 1087 | ||
1060 | if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM) | ||
1061 | max = RPCSVC_MAXPAYLOAD_UDP; | ||
1062 | if (rqstp->rq_server->sv_max_payload < max) | 1088 | if (rqstp->rq_server->sv_max_payload < max) |
1063 | max = rqstp->rq_server->sv_max_payload; | 1089 | max = rqstp->rq_server->sv_max_payload; |
1064 | return max; | 1090 | return max; |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c new file mode 100644 index 000000000000..ea377e06afae --- /dev/null +++ b/net/sunrpc/svc_xprt.c | |||
@@ -0,0 +1,1055 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/svc_xprt.c | ||
3 | * | ||
4 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/sched.h> | ||
8 | #include <linux/errno.h> | ||
9 | #include <linux/fcntl.h> | ||
10 | #include <linux/net.h> | ||
11 | #include <linux/in.h> | ||
12 | #include <linux/inet.h> | ||
13 | #include <linux/udp.h> | ||
14 | #include <linux/tcp.h> | ||
15 | #include <linux/unistd.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/netdevice.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/file.h> | ||
20 | #include <linux/freezer.h> | ||
21 | #include <net/sock.h> | ||
22 | #include <net/checksum.h> | ||
23 | #include <net/ip.h> | ||
24 | #include <net/ipv6.h> | ||
25 | #include <net/tcp_states.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <asm/ioctls.h> | ||
28 | |||
29 | #include <linux/sunrpc/types.h> | ||
30 | #include <linux/sunrpc/clnt.h> | ||
31 | #include <linux/sunrpc/xdr.h> | ||
32 | #include <linux/sunrpc/stats.h> | ||
33 | #include <linux/sunrpc/svc_xprt.h> | ||
34 | |||
35 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
36 | |||
37 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); | ||
38 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
39 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
40 | static void svc_age_temp_xprts(unsigned long closure); | ||
41 | |||
42 | /* apparently the "standard" is that clients close | ||
43 | * idle connections after 5 minutes, servers after | ||
44 | * 6 minutes | ||
45 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
46 | */ | ||
47 | static int svc_conn_age_period = 6*60; | ||
48 | |||
49 | /* List of registered transport classes */ | ||
50 | static DEFINE_SPINLOCK(svc_xprt_class_lock); | ||
51 | static LIST_HEAD(svc_xprt_class_list); | ||
52 | |||
53 | /* SMP locking strategy: | ||
54 | * | ||
55 | * svc_pool->sp_lock protects most of the fields of that pool. | ||
56 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
57 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
58 | * BKL protects svc_serv->sv_nrthread. | ||
59 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | ||
60 | * and the ->sk_info_authunix cache. | ||
61 | * | ||
62 | * The XPT_BUSY bit in xprt->xpt_flags prevents a transport being | ||
63 | * enqueued multiply. During normal transport processing this bit | ||
64 | * is set by svc_xprt_enqueue and cleared by svc_xprt_received. | ||
65 | * Providers should not manipulate this bit directly. | ||
66 | * | ||
67 | * Some flags can be set to certain values at any time | ||
68 | * providing that certain rules are followed: | ||
69 | * | ||
70 | * XPT_CONN, XPT_DATA: | ||
71 | * - Can be set or cleared at any time. | ||
72 | * - After a set, svc_xprt_enqueue must be called to enqueue | ||
73 | * the transport for processing. | ||
74 | * - After a clear, the transport must be read/accepted. | ||
75 | * If this succeeds, it must be set again. | ||
76 | * XPT_CLOSE: | ||
77 | * - Can set at any time. It is never cleared. | ||
78 | * XPT_DEAD: | ||
79 | * - Can only be set while XPT_BUSY is held which ensures | ||
80 | * that no other thread will be using the transport or will | ||
81 | * try to set XPT_DEAD. | ||
82 | */ | ||
83 | |||
84 | int svc_reg_xprt_class(struct svc_xprt_class *xcl) | ||
85 | { | ||
86 | struct svc_xprt_class *cl; | ||
87 | int res = -EEXIST; | ||
88 | |||
89 | dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name); | ||
90 | |||
91 | INIT_LIST_HEAD(&xcl->xcl_list); | ||
92 | spin_lock(&svc_xprt_class_lock); | ||
93 | /* Make sure there isn't already a class with the same name */ | ||
94 | list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) { | ||
95 | if (strcmp(xcl->xcl_name, cl->xcl_name) == 0) | ||
96 | goto out; | ||
97 | } | ||
98 | list_add_tail(&xcl->xcl_list, &svc_xprt_class_list); | ||
99 | res = 0; | ||
100 | out: | ||
101 | spin_unlock(&svc_xprt_class_lock); | ||
102 | return res; | ||
103 | } | ||
104 | EXPORT_SYMBOL_GPL(svc_reg_xprt_class); | ||
105 | |||
106 | void svc_unreg_xprt_class(struct svc_xprt_class *xcl) | ||
107 | { | ||
108 | dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name); | ||
109 | spin_lock(&svc_xprt_class_lock); | ||
110 | list_del_init(&xcl->xcl_list); | ||
111 | spin_unlock(&svc_xprt_class_lock); | ||
112 | } | ||
113 | EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); | ||
114 | |||
115 | /* | ||
116 | * Format the transport list for printing | ||
117 | */ | ||
118 | int svc_print_xprts(char *buf, int maxlen) | ||
119 | { | ||
120 | struct list_head *le; | ||
121 | char tmpstr[80]; | ||
122 | int len = 0; | ||
123 | buf[0] = '\0'; | ||
124 | |||
125 | spin_lock(&svc_xprt_class_lock); | ||
126 | list_for_each(le, &svc_xprt_class_list) { | ||
127 | int slen; | ||
128 | struct svc_xprt_class *xcl = | ||
129 | list_entry(le, struct svc_xprt_class, xcl_list); | ||
130 | |||
131 | sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); | ||
132 | slen = strlen(tmpstr); | ||
133 | if (len + slen > maxlen) | ||
134 | break; | ||
135 | len += slen; | ||
136 | strcat(buf, tmpstr); | ||
137 | } | ||
138 | spin_unlock(&svc_xprt_class_lock); | ||
139 | |||
140 | return len; | ||
141 | } | ||
142 | |||
143 | static void svc_xprt_free(struct kref *kref) | ||
144 | { | ||
145 | struct svc_xprt *xprt = | ||
146 | container_of(kref, struct svc_xprt, xpt_ref); | ||
147 | struct module *owner = xprt->xpt_class->xcl_owner; | ||
148 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) | ||
149 | && xprt->xpt_auth_cache != NULL) | ||
150 | svcauth_unix_info_release(xprt->xpt_auth_cache); | ||
151 | xprt->xpt_ops->xpo_free(xprt); | ||
152 | module_put(owner); | ||
153 | } | ||
154 | |||
155 | void svc_xprt_put(struct svc_xprt *xprt) | ||
156 | { | ||
157 | kref_put(&xprt->xpt_ref, svc_xprt_free); | ||
158 | } | ||
159 | EXPORT_SYMBOL_GPL(svc_xprt_put); | ||
160 | |||
161 | /* | ||
162 | * Called by transport drivers to initialize the transport independent | ||
163 | * portion of the transport instance. | ||
164 | */ | ||
165 | void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, | ||
166 | struct svc_serv *serv) | ||
167 | { | ||
168 | memset(xprt, 0, sizeof(*xprt)); | ||
169 | xprt->xpt_class = xcl; | ||
170 | xprt->xpt_ops = xcl->xcl_ops; | ||
171 | kref_init(&xprt->xpt_ref); | ||
172 | xprt->xpt_server = serv; | ||
173 | INIT_LIST_HEAD(&xprt->xpt_list); | ||
174 | INIT_LIST_HEAD(&xprt->xpt_ready); | ||
175 | INIT_LIST_HEAD(&xprt->xpt_deferred); | ||
176 | mutex_init(&xprt->xpt_mutex); | ||
177 | spin_lock_init(&xprt->xpt_lock); | ||
178 | set_bit(XPT_BUSY, &xprt->xpt_flags); | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(svc_xprt_init); | ||
181 | |||
182 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | ||
183 | int flags) | ||
184 | { | ||
185 | struct svc_xprt_class *xcl; | ||
186 | struct sockaddr_in sin = { | ||
187 | .sin_family = AF_INET, | ||
188 | .sin_addr.s_addr = INADDR_ANY, | ||
189 | .sin_port = htons(port), | ||
190 | }; | ||
191 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); | ||
192 | spin_lock(&svc_xprt_class_lock); | ||
193 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { | ||
194 | struct svc_xprt *newxprt; | ||
195 | |||
196 | if (strcmp(xprt_name, xcl->xcl_name)) | ||
197 | continue; | ||
198 | |||
199 | if (!try_module_get(xcl->xcl_owner)) | ||
200 | goto err; | ||
201 | |||
202 | spin_unlock(&svc_xprt_class_lock); | ||
203 | newxprt = xcl->xcl_ops-> | ||
204 | xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), | ||
205 | flags); | ||
206 | if (IS_ERR(newxprt)) { | ||
207 | module_put(xcl->xcl_owner); | ||
208 | return PTR_ERR(newxprt); | ||
209 | } | ||
210 | |||
211 | clear_bit(XPT_TEMP, &newxprt->xpt_flags); | ||
212 | spin_lock_bh(&serv->sv_lock); | ||
213 | list_add(&newxprt->xpt_list, &serv->sv_permsocks); | ||
214 | spin_unlock_bh(&serv->sv_lock); | ||
215 | clear_bit(XPT_BUSY, &newxprt->xpt_flags); | ||
216 | return svc_xprt_local_port(newxprt); | ||
217 | } | ||
218 | err: | ||
219 | spin_unlock(&svc_xprt_class_lock); | ||
220 | dprintk("svc: transport %s not found\n", xprt_name); | ||
221 | return -ENOENT; | ||
222 | } | ||
223 | EXPORT_SYMBOL_GPL(svc_create_xprt); | ||
224 | |||
225 | /* | ||
226 | * Copy the local and remote xprt addresses to the rqstp structure | ||
227 | */ | ||
228 | void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) | ||
229 | { | ||
230 | struct sockaddr *sin; | ||
231 | |||
232 | memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); | ||
233 | rqstp->rq_addrlen = xprt->xpt_remotelen; | ||
234 | |||
235 | /* | ||
236 | * Destination address in request is needed for binding the | ||
237 | * source address in RPC replies/callbacks later. | ||
238 | */ | ||
239 | sin = (struct sockaddr *)&xprt->xpt_local; | ||
240 | switch (sin->sa_family) { | ||
241 | case AF_INET: | ||
242 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | ||
243 | break; | ||
244 | case AF_INET6: | ||
245 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | ||
246 | break; | ||
247 | } | ||
248 | } | ||
249 | EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); | ||
250 | |||
251 | /** | ||
252 | * svc_print_addr - Format rq_addr field for printing | ||
253 | * @rqstp: svc_rqst struct containing address to print | ||
254 | * @buf: target buffer for formatted address | ||
255 | * @len: length of target buffer | ||
256 | * | ||
257 | */ | ||
258 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | ||
259 | { | ||
260 | return __svc_print_addr(svc_addr(rqstp), buf, len); | ||
261 | } | ||
262 | EXPORT_SYMBOL_GPL(svc_print_addr); | ||
263 | |||
264 | /* | ||
265 | * Queue up an idle server thread. Must have pool->sp_lock held. | ||
266 | * Note: this is really a stack rather than a queue, so that we only | ||
267 | * use as many different threads as we need, and the rest don't pollute | ||
268 | * the cache. | ||
269 | */ | ||
270 | static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
271 | { | ||
272 | list_add(&rqstp->rq_list, &pool->sp_threads); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | ||
277 | */ | ||
278 | static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
279 | { | ||
280 | list_del(&rqstp->rq_list); | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Queue up a transport with data pending. If there are idle nfsd | ||
285 | * processes, wake 'em up. | ||
286 | * | ||
287 | */ | ||
288 | void svc_xprt_enqueue(struct svc_xprt *xprt) | ||
289 | { | ||
290 | struct svc_serv *serv = xprt->xpt_server; | ||
291 | struct svc_pool *pool; | ||
292 | struct svc_rqst *rqstp; | ||
293 | int cpu; | ||
294 | |||
295 | if (!(xprt->xpt_flags & | ||
296 | ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) | ||
297 | return; | ||
298 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | ||
299 | return; | ||
300 | |||
301 | cpu = get_cpu(); | ||
302 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | ||
303 | put_cpu(); | ||
304 | |||
305 | spin_lock_bh(&pool->sp_lock); | ||
306 | |||
307 | if (!list_empty(&pool->sp_threads) && | ||
308 | !list_empty(&pool->sp_sockets)) | ||
309 | printk(KERN_ERR | ||
310 | "svc_xprt_enqueue: " | ||
311 | "threads and transports both waiting??\n"); | ||
312 | |||
313 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { | ||
314 | /* Don't enqueue dead transports */ | ||
315 | dprintk("svc: transport %p is dead, not enqueued\n", xprt); | ||
316 | goto out_unlock; | ||
317 | } | ||
318 | |||
319 | /* Mark transport as busy. It will remain in this state until | ||
320 | * the provider calls svc_xprt_received. We update XPT_BUSY | ||
321 | * atomically because it also guards against trying to enqueue | ||
322 | * the transport twice. | ||
323 | */ | ||
324 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { | ||
325 | /* Don't enqueue transport while already enqueued */ | ||
326 | dprintk("svc: transport %p busy, not enqueued\n", xprt); | ||
327 | goto out_unlock; | ||
328 | } | ||
329 | BUG_ON(xprt->xpt_pool != NULL); | ||
330 | xprt->xpt_pool = pool; | ||
331 | |||
332 | /* Handle pending connection */ | ||
333 | if (test_bit(XPT_CONN, &xprt->xpt_flags)) | ||
334 | goto process; | ||
335 | |||
336 | /* Handle close in-progress */ | ||
337 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | ||
338 | goto process; | ||
339 | |||
340 | /* Check if we have space to reply to a request */ | ||
341 | if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { | ||
342 | /* Don't enqueue while not enough space for reply */ | ||
343 | dprintk("svc: no write space, transport %p not enqueued\n", | ||
344 | xprt); | ||
345 | xprt->xpt_pool = NULL; | ||
346 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
347 | goto out_unlock; | ||
348 | } | ||
349 | |||
350 | process: | ||
351 | if (!list_empty(&pool->sp_threads)) { | ||
352 | rqstp = list_entry(pool->sp_threads.next, | ||
353 | struct svc_rqst, | ||
354 | rq_list); | ||
355 | dprintk("svc: transport %p served by daemon %p\n", | ||
356 | xprt, rqstp); | ||
357 | svc_thread_dequeue(pool, rqstp); | ||
358 | if (rqstp->rq_xprt) | ||
359 | printk(KERN_ERR | ||
360 | "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", | ||
361 | rqstp, rqstp->rq_xprt); | ||
362 | rqstp->rq_xprt = xprt; | ||
363 | svc_xprt_get(xprt); | ||
364 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
365 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
366 | BUG_ON(xprt->xpt_pool != pool); | ||
367 | wake_up(&rqstp->rq_wait); | ||
368 | } else { | ||
369 | dprintk("svc: transport %p put into queue\n", xprt); | ||
370 | list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); | ||
371 | BUG_ON(xprt->xpt_pool != pool); | ||
372 | } | ||
373 | |||
374 | out_unlock: | ||
375 | spin_unlock_bh(&pool->sp_lock); | ||
376 | } | ||
377 | EXPORT_SYMBOL_GPL(svc_xprt_enqueue); | ||
378 | |||
379 | /* | ||
380 | * Dequeue the first transport. Must be called with the pool->sp_lock held. | ||
381 | */ | ||
382 | static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) | ||
383 | { | ||
384 | struct svc_xprt *xprt; | ||
385 | |||
386 | if (list_empty(&pool->sp_sockets)) | ||
387 | return NULL; | ||
388 | |||
389 | xprt = list_entry(pool->sp_sockets.next, | ||
390 | struct svc_xprt, xpt_ready); | ||
391 | list_del_init(&xprt->xpt_ready); | ||
392 | |||
393 | dprintk("svc: transport %p dequeued, inuse=%d\n", | ||
394 | xprt, atomic_read(&xprt->xpt_ref.refcount)); | ||
395 | |||
396 | return xprt; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * svc_xprt_received conditionally queues the transport for processing | ||
401 | * by another thread. The caller must hold the XPT_BUSY bit and must | ||
402 | * not thereafter touch transport data. | ||
403 | * | ||
404 | * Note: XPT_DATA only gets cleared when a read-attempt finds no (or | ||
405 | * insufficient) data. | ||
406 | */ | ||
407 | void svc_xprt_received(struct svc_xprt *xprt) | ||
408 | { | ||
409 | BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); | ||
410 | xprt->xpt_pool = NULL; | ||
411 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
412 | svc_xprt_enqueue(xprt); | ||
413 | } | ||
414 | EXPORT_SYMBOL_GPL(svc_xprt_received); | ||
415 | |||
416 | /** | ||
417 | * svc_reserve - change the space reserved for the reply to a request. | ||
418 | * @rqstp: The request in question | ||
419 | * @space: new max space to reserve | ||
420 | * | ||
421 | * Each request reserves some space on the output queue of the transport | ||
422 | * to make sure the reply fits. This function reduces that reserved | ||
423 | * space to be the amount of space used already, plus @space. | ||
424 | * | ||
425 | */ | ||
426 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
427 | { | ||
428 | space += rqstp->rq_res.head[0].iov_len; | ||
429 | |||
430 | if (space < rqstp->rq_reserved) { | ||
431 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
432 | atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); | ||
433 | rqstp->rq_reserved = space; | ||
434 | |||
435 | svc_xprt_enqueue(xprt); | ||
436 | } | ||
437 | } | ||
438 | EXPORT_SYMBOL(svc_reserve); | ||
439 | |||
440 | static void svc_xprt_release(struct svc_rqst *rqstp) | ||
441 | { | ||
442 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
443 | |||
444 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | ||
445 | |||
446 | svc_free_res_pages(rqstp); | ||
447 | rqstp->rq_res.page_len = 0; | ||
448 | rqstp->rq_res.page_base = 0; | ||
449 | |||
450 | /* Reset response buffer and release | ||
451 | * the reservation. | ||
452 | * But first, check that enough space was reserved | ||
453 | * for the reply, otherwise we have a bug! | ||
454 | */ | ||
455 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
456 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
457 | rqstp->rq_reserved, | ||
458 | rqstp->rq_res.len); | ||
459 | |||
460 | rqstp->rq_res.head[0].iov_len = 0; | ||
461 | svc_reserve(rqstp, 0); | ||
462 | rqstp->rq_xprt = NULL; | ||
463 | |||
464 | svc_xprt_put(xprt); | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * External function to wake up a server waiting for data | ||
469 | * This really only makes sense for services like lockd | ||
470 | * which have exactly one thread anyway. | ||
471 | */ | ||
472 | void svc_wake_up(struct svc_serv *serv) | ||
473 | { | ||
474 | struct svc_rqst *rqstp; | ||
475 | unsigned int i; | ||
476 | struct svc_pool *pool; | ||
477 | |||
478 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
479 | pool = &serv->sv_pools[i]; | ||
480 | |||
481 | spin_lock_bh(&pool->sp_lock); | ||
482 | if (!list_empty(&pool->sp_threads)) { | ||
483 | rqstp = list_entry(pool->sp_threads.next, | ||
484 | struct svc_rqst, | ||
485 | rq_list); | ||
486 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
487 | /* | ||
488 | svc_thread_dequeue(pool, rqstp); | ||
489 | rqstp->rq_xprt = NULL; | ||
490 | */ | ||
491 | wake_up(&rqstp->rq_wait); | ||
492 | } | ||
493 | spin_unlock_bh(&pool->sp_lock); | ||
494 | } | ||
495 | } | ||
496 | EXPORT_SYMBOL(svc_wake_up); | ||
497 | |||
498 | int svc_port_is_privileged(struct sockaddr *sin) | ||
499 | { | ||
500 | switch (sin->sa_family) { | ||
501 | case AF_INET: | ||
502 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | ||
503 | < PROT_SOCK; | ||
504 | case AF_INET6: | ||
505 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | ||
506 | < PROT_SOCK; | ||
507 | default: | ||
508 | return 0; | ||
509 | } | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Make sure that we don't have too many active connections. If we | ||
514 | * have, something must be dropped. | ||
515 | * | ||
516 | * There's no point in trying to do random drop here for DoS | ||
517 | * prevention. The NFS clients does 1 reconnect in 15 seconds. An | ||
518 | * attacker can easily beat that. | ||
519 | * | ||
520 | * The only somewhat efficient mechanism would be if drop old | ||
521 | * connections from the same IP first. But right now we don't even | ||
522 | * record the client IP in svc_sock. | ||
523 | */ | ||
524 | static void svc_check_conn_limits(struct svc_serv *serv) | ||
525 | { | ||
526 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
527 | struct svc_xprt *xprt = NULL; | ||
528 | spin_lock_bh(&serv->sv_lock); | ||
529 | if (!list_empty(&serv->sv_tempsocks)) { | ||
530 | if (net_ratelimit()) { | ||
531 | /* Try to help the admin */ | ||
532 | printk(KERN_NOTICE "%s: too many open " | ||
533 | "connections, consider increasing the " | ||
534 | "number of nfsd threads\n", | ||
535 | serv->sv_name); | ||
536 | } | ||
537 | /* | ||
538 | * Always select the oldest connection. It's not fair, | ||
539 | * but so is life | ||
540 | */ | ||
541 | xprt = list_entry(serv->sv_tempsocks.prev, | ||
542 | struct svc_xprt, | ||
543 | xpt_list); | ||
544 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
545 | svc_xprt_get(xprt); | ||
546 | } | ||
547 | spin_unlock_bh(&serv->sv_lock); | ||
548 | |||
549 | if (xprt) { | ||
550 | svc_xprt_enqueue(xprt); | ||
551 | svc_xprt_put(xprt); | ||
552 | } | ||
553 | } | ||
554 | } | ||
555 | |||
556 | /* | ||
557 | * Receive the next request on any transport. This code is carefully | ||
558 | * organised not to touch any cachelines in the shared svc_serv | ||
559 | * structure, only cachelines in the local svc_pool. | ||
560 | */ | ||
561 | int svc_recv(struct svc_rqst *rqstp, long timeout) | ||
562 | { | ||
563 | struct svc_xprt *xprt = NULL; | ||
564 | struct svc_serv *serv = rqstp->rq_server; | ||
565 | struct svc_pool *pool = rqstp->rq_pool; | ||
566 | int len, i; | ||
567 | int pages; | ||
568 | struct xdr_buf *arg; | ||
569 | DECLARE_WAITQUEUE(wait, current); | ||
570 | |||
571 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
572 | rqstp, timeout); | ||
573 | |||
574 | if (rqstp->rq_xprt) | ||
575 | printk(KERN_ERR | ||
576 | "svc_recv: service %p, transport not NULL!\n", | ||
577 | rqstp); | ||
578 | if (waitqueue_active(&rqstp->rq_wait)) | ||
579 | printk(KERN_ERR | ||
580 | "svc_recv: service %p, wait queue active!\n", | ||
581 | rqstp); | ||
582 | |||
583 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
584 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | ||
585 | for (i = 0; i < pages ; i++) | ||
586 | while (rqstp->rq_pages[i] == NULL) { | ||
587 | struct page *p = alloc_page(GFP_KERNEL); | ||
588 | if (!p) { | ||
589 | int j = msecs_to_jiffies(500); | ||
590 | schedule_timeout_uninterruptible(j); | ||
591 | } | ||
592 | rqstp->rq_pages[i] = p; | ||
593 | } | ||
594 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | ||
595 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
596 | |||
597 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
598 | arg = &rqstp->rq_arg; | ||
599 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | ||
600 | arg->head[0].iov_len = PAGE_SIZE; | ||
601 | arg->pages = rqstp->rq_pages + 1; | ||
602 | arg->page_base = 0; | ||
603 | /* save at least one page for response */ | ||
604 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
605 | arg->len = (pages-1)*PAGE_SIZE; | ||
606 | arg->tail[0].iov_len = 0; | ||
607 | |||
608 | try_to_freeze(); | ||
609 | cond_resched(); | ||
610 | if (signalled()) | ||
611 | return -EINTR; | ||
612 | |||
613 | spin_lock_bh(&pool->sp_lock); | ||
614 | xprt = svc_xprt_dequeue(pool); | ||
615 | if (xprt) { | ||
616 | rqstp->rq_xprt = xprt; | ||
617 | svc_xprt_get(xprt); | ||
618 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
619 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
620 | } else { | ||
621 | /* No data pending. Go to sleep */ | ||
622 | svc_thread_enqueue(pool, rqstp); | ||
623 | |||
624 | /* | ||
625 | * We have to be able to interrupt this wait | ||
626 | * to bring down the daemons ... | ||
627 | */ | ||
628 | set_current_state(TASK_INTERRUPTIBLE); | ||
629 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
630 | spin_unlock_bh(&pool->sp_lock); | ||
631 | |||
632 | schedule_timeout(timeout); | ||
633 | |||
634 | try_to_freeze(); | ||
635 | |||
636 | spin_lock_bh(&pool->sp_lock); | ||
637 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
638 | |||
639 | xprt = rqstp->rq_xprt; | ||
640 | if (!xprt) { | ||
641 | svc_thread_dequeue(pool, rqstp); | ||
642 | spin_unlock_bh(&pool->sp_lock); | ||
643 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
644 | return signalled()? -EINTR : -EAGAIN; | ||
645 | } | ||
646 | } | ||
647 | spin_unlock_bh(&pool->sp_lock); | ||
648 | |||
649 | len = 0; | ||
650 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { | ||
651 | dprintk("svc_recv: found XPT_CLOSE\n"); | ||
652 | svc_delete_xprt(xprt); | ||
653 | } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { | ||
654 | struct svc_xprt *newxpt; | ||
655 | newxpt = xprt->xpt_ops->xpo_accept(xprt); | ||
656 | if (newxpt) { | ||
657 | /* | ||
658 | * We know this module_get will succeed because the | ||
659 | * listener holds a reference too | ||
660 | */ | ||
661 | __module_get(newxpt->xpt_class->xcl_owner); | ||
662 | svc_check_conn_limits(xprt->xpt_server); | ||
663 | spin_lock_bh(&serv->sv_lock); | ||
664 | set_bit(XPT_TEMP, &newxpt->xpt_flags); | ||
665 | list_add(&newxpt->xpt_list, &serv->sv_tempsocks); | ||
666 | serv->sv_tmpcnt++; | ||
667 | if (serv->sv_temptimer.function == NULL) { | ||
668 | /* setup timer to age temp transports */ | ||
669 | setup_timer(&serv->sv_temptimer, | ||
670 | svc_age_temp_xprts, | ||
671 | (unsigned long)serv); | ||
672 | mod_timer(&serv->sv_temptimer, | ||
673 | jiffies + svc_conn_age_period * HZ); | ||
674 | } | ||
675 | spin_unlock_bh(&serv->sv_lock); | ||
676 | svc_xprt_received(newxpt); | ||
677 | } | ||
678 | svc_xprt_received(xprt); | ||
679 | } else { | ||
680 | dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", | ||
681 | rqstp, pool->sp_id, xprt, | ||
682 | atomic_read(&xprt->xpt_ref.refcount)); | ||
683 | rqstp->rq_deferred = svc_deferred_dequeue(xprt); | ||
684 | if (rqstp->rq_deferred) { | ||
685 | svc_xprt_received(xprt); | ||
686 | len = svc_deferred_recv(rqstp); | ||
687 | } else | ||
688 | len = xprt->xpt_ops->xpo_recvfrom(rqstp); | ||
689 | dprintk("svc: got len=%d\n", len); | ||
690 | } | ||
691 | |||
692 | /* No data, incomplete (TCP) read, or accept() */ | ||
693 | if (len == 0 || len == -EAGAIN) { | ||
694 | rqstp->rq_res.len = 0; | ||
695 | svc_xprt_release(rqstp); | ||
696 | return -EAGAIN; | ||
697 | } | ||
698 | clear_bit(XPT_OLD, &xprt->xpt_flags); | ||
699 | |||
700 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | ||
701 | rqstp->rq_chandle.defer = svc_defer; | ||
702 | |||
703 | if (serv->sv_stats) | ||
704 | serv->sv_stats->netcnt++; | ||
705 | return len; | ||
706 | } | ||
707 | EXPORT_SYMBOL(svc_recv); | ||
708 | |||
709 | /* | ||
710 | * Drop request | ||
711 | */ | ||
712 | void svc_drop(struct svc_rqst *rqstp) | ||
713 | { | ||
714 | dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); | ||
715 | svc_xprt_release(rqstp); | ||
716 | } | ||
717 | EXPORT_SYMBOL(svc_drop); | ||
718 | |||
719 | /* | ||
720 | * Return reply to client. | ||
721 | */ | ||
722 | int svc_send(struct svc_rqst *rqstp) | ||
723 | { | ||
724 | struct svc_xprt *xprt; | ||
725 | int len; | ||
726 | struct xdr_buf *xb; | ||
727 | |||
728 | xprt = rqstp->rq_xprt; | ||
729 | if (!xprt) | ||
730 | return -EFAULT; | ||
731 | |||
732 | /* release the receive skb before sending the reply */ | ||
733 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | ||
734 | |||
735 | /* calculate over-all length */ | ||
736 | xb = &rqstp->rq_res; | ||
737 | xb->len = xb->head[0].iov_len + | ||
738 | xb->page_len + | ||
739 | xb->tail[0].iov_len; | ||
740 | |||
741 | /* Grab mutex to serialize outgoing data. */ | ||
742 | mutex_lock(&xprt->xpt_mutex); | ||
743 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | ||
744 | len = -ENOTCONN; | ||
745 | else | ||
746 | len = xprt->xpt_ops->xpo_sendto(rqstp); | ||
747 | mutex_unlock(&xprt->xpt_mutex); | ||
748 | svc_xprt_release(rqstp); | ||
749 | |||
750 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
751 | return 0; | ||
752 | return len; | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Timer function to close old temporary transports, using | ||
757 | * a mark-and-sweep algorithm. | ||
758 | */ | ||
759 | static void svc_age_temp_xprts(unsigned long closure) | ||
760 | { | ||
761 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
762 | struct svc_xprt *xprt; | ||
763 | struct list_head *le, *next; | ||
764 | LIST_HEAD(to_be_aged); | ||
765 | |||
766 | dprintk("svc_age_temp_xprts\n"); | ||
767 | |||
768 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
769 | /* busy, try again 1 sec later */ | ||
770 | dprintk("svc_age_temp_xprts: busy\n"); | ||
771 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
772 | return; | ||
773 | } | ||
774 | |||
775 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
776 | xprt = list_entry(le, struct svc_xprt, xpt_list); | ||
777 | |||
778 | /* First time through, just mark it OLD. Second time | ||
779 | * through, close it. */ | ||
780 | if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) | ||
781 | continue; | ||
782 | if (atomic_read(&xprt->xpt_ref.refcount) > 1 | ||
783 | || test_bit(XPT_BUSY, &xprt->xpt_flags)) | ||
784 | continue; | ||
785 | svc_xprt_get(xprt); | ||
786 | list_move(le, &to_be_aged); | ||
787 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
788 | set_bit(XPT_DETACHED, &xprt->xpt_flags); | ||
789 | } | ||
790 | spin_unlock_bh(&serv->sv_lock); | ||
791 | |||
792 | while (!list_empty(&to_be_aged)) { | ||
793 | le = to_be_aged.next; | ||
794 | /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ | ||
795 | list_del_init(le); | ||
796 | xprt = list_entry(le, struct svc_xprt, xpt_list); | ||
797 | |||
798 | dprintk("queuing xprt %p for closing\n", xprt); | ||
799 | |||
800 | /* a thread will dequeue and close it soon */ | ||
801 | svc_xprt_enqueue(xprt); | ||
802 | svc_xprt_put(xprt); | ||
803 | } | ||
804 | |||
805 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Remove a dead transport | ||
810 | */ | ||
811 | void svc_delete_xprt(struct svc_xprt *xprt) | ||
812 | { | ||
813 | struct svc_serv *serv = xprt->xpt_server; | ||
814 | |||
815 | dprintk("svc: svc_delete_xprt(%p)\n", xprt); | ||
816 | xprt->xpt_ops->xpo_detach(xprt); | ||
817 | |||
818 | spin_lock_bh(&serv->sv_lock); | ||
819 | if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) | ||
820 | list_del_init(&xprt->xpt_list); | ||
821 | /* | ||
822 | * We used to delete the transport from whichever list | ||
823 | * it's sk_xprt.xpt_ready node was on, but we don't actually | ||
824 | * need to. This is because the only time we're called | ||
825 | * while still attached to a queue, the queue itself | ||
826 | * is about to be destroyed (in svc_destroy). | ||
827 | */ | ||
828 | if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) { | ||
829 | BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2); | ||
830 | if (test_bit(XPT_TEMP, &xprt->xpt_flags)) | ||
831 | serv->sv_tmpcnt--; | ||
832 | svc_xprt_put(xprt); | ||
833 | } | ||
834 | spin_unlock_bh(&serv->sv_lock); | ||
835 | } | ||
836 | |||
837 | void svc_close_xprt(struct svc_xprt *xprt) | ||
838 | { | ||
839 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
840 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) | ||
841 | /* someone else will have to effect the close */ | ||
842 | return; | ||
843 | |||
844 | svc_xprt_get(xprt); | ||
845 | svc_delete_xprt(xprt); | ||
846 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
847 | svc_xprt_put(xprt); | ||
848 | } | ||
849 | EXPORT_SYMBOL_GPL(svc_close_xprt); | ||
850 | |||
851 | void svc_close_all(struct list_head *xprt_list) | ||
852 | { | ||
853 | struct svc_xprt *xprt; | ||
854 | struct svc_xprt *tmp; | ||
855 | |||
856 | list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { | ||
857 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
858 | if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { | ||
859 | /* Waiting to be processed, but no threads left, | ||
860 | * So just remove it from the waiting list | ||
861 | */ | ||
862 | list_del_init(&xprt->xpt_ready); | ||
863 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
864 | } | ||
865 | svc_close_xprt(xprt); | ||
866 | } | ||
867 | } | ||
868 | |||
869 | /* | ||
870 | * Handle defer and revisit of requests | ||
871 | */ | ||
872 | |||
873 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
874 | { | ||
875 | struct svc_deferred_req *dr = | ||
876 | container_of(dreq, struct svc_deferred_req, handle); | ||
877 | struct svc_xprt *xprt = dr->xprt; | ||
878 | |||
879 | if (too_many) { | ||
880 | svc_xprt_put(xprt); | ||
881 | kfree(dr); | ||
882 | return; | ||
883 | } | ||
884 | dprintk("revisit queued\n"); | ||
885 | dr->xprt = NULL; | ||
886 | spin_lock(&xprt->xpt_lock); | ||
887 | list_add(&dr->handle.recent, &xprt->xpt_deferred); | ||
888 | spin_unlock(&xprt->xpt_lock); | ||
889 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
890 | svc_xprt_enqueue(xprt); | ||
891 | svc_xprt_put(xprt); | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * Save the request off for later processing. The request buffer looks | ||
896 | * like this: | ||
897 | * | ||
898 | * <xprt-header><rpc-header><rpc-pagelist><rpc-tail> | ||
899 | * | ||
900 | * This code can only handle requests that consist of an xprt-header | ||
901 | * and rpc-header. | ||
902 | */ | ||
903 | static struct cache_deferred_req *svc_defer(struct cache_req *req) | ||
904 | { | ||
905 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
906 | struct svc_deferred_req *dr; | ||
907 | |||
908 | if (rqstp->rq_arg.page_len) | ||
909 | return NULL; /* if more than a page, give up FIXME */ | ||
910 | if (rqstp->rq_deferred) { | ||
911 | dr = rqstp->rq_deferred; | ||
912 | rqstp->rq_deferred = NULL; | ||
913 | } else { | ||
914 | size_t skip; | ||
915 | size_t size; | ||
916 | /* FIXME maybe discard if size too large */ | ||
917 | size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len; | ||
918 | dr = kmalloc(size, GFP_KERNEL); | ||
919 | if (dr == NULL) | ||
920 | return NULL; | ||
921 | |||
922 | dr->handle.owner = rqstp->rq_server; | ||
923 | dr->prot = rqstp->rq_prot; | ||
924 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | ||
925 | dr->addrlen = rqstp->rq_addrlen; | ||
926 | dr->daddr = rqstp->rq_daddr; | ||
927 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
928 | dr->xprt_hlen = rqstp->rq_xprt_hlen; | ||
929 | |||
930 | /* back up head to the start of the buffer and copy */ | ||
931 | skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
932 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, | ||
933 | dr->argslen << 2); | ||
934 | } | ||
935 | svc_xprt_get(rqstp->rq_xprt); | ||
936 | dr->xprt = rqstp->rq_xprt; | ||
937 | |||
938 | dr->handle.revisit = svc_revisit; | ||
939 | return &dr->handle; | ||
940 | } | ||
941 | |||
942 | /* | ||
943 | * recv data from a deferred request into an active one | ||
944 | */ | ||
945 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
946 | { | ||
947 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
948 | |||
949 | /* setup iov_base past transport header */ | ||
950 | rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); | ||
951 | /* The iov_len does not include the transport header bytes */ | ||
952 | rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen; | ||
953 | rqstp->rq_arg.page_len = 0; | ||
954 | /* The rq_arg.len includes the transport header bytes */ | ||
955 | rqstp->rq_arg.len = dr->argslen<<2; | ||
956 | rqstp->rq_prot = dr->prot; | ||
957 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | ||
958 | rqstp->rq_addrlen = dr->addrlen; | ||
959 | /* Save off transport header len in case we get deferred again */ | ||
960 | rqstp->rq_xprt_hlen = dr->xprt_hlen; | ||
961 | rqstp->rq_daddr = dr->daddr; | ||
962 | rqstp->rq_respages = rqstp->rq_pages; | ||
963 | return (dr->argslen<<2) - dr->xprt_hlen; | ||
964 | } | ||
965 | |||
966 | |||
967 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) | ||
968 | { | ||
969 | struct svc_deferred_req *dr = NULL; | ||
970 | |||
971 | if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) | ||
972 | return NULL; | ||
973 | spin_lock(&xprt->xpt_lock); | ||
974 | clear_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
975 | if (!list_empty(&xprt->xpt_deferred)) { | ||
976 | dr = list_entry(xprt->xpt_deferred.next, | ||
977 | struct svc_deferred_req, | ||
978 | handle.recent); | ||
979 | list_del_init(&dr->handle.recent); | ||
980 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
981 | } | ||
982 | spin_unlock(&xprt->xpt_lock); | ||
983 | return dr; | ||
984 | } | ||
985 | |||
986 | /* | ||
987 | * Return the transport instance pointer for the endpoint accepting | ||
988 | * connections/peer traffic from the specified transport class, | ||
989 | * address family and port. | ||
990 | * | ||
991 | * Specifying 0 for the address family or port is effectively a | ||
992 | * wild-card, and will result in matching the first transport in the | ||
993 | * service's list that has a matching class name. | ||
994 | */ | ||
995 | struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, | ||
996 | int af, int port) | ||
997 | { | ||
998 | struct svc_xprt *xprt; | ||
999 | struct svc_xprt *found = NULL; | ||
1000 | |||
1001 | /* Sanity check the args */ | ||
1002 | if (!serv || !xcl_name) | ||
1003 | return found; | ||
1004 | |||
1005 | spin_lock_bh(&serv->sv_lock); | ||
1006 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { | ||
1007 | if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) | ||
1008 | continue; | ||
1009 | if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) | ||
1010 | continue; | ||
1011 | if (port && port != svc_xprt_local_port(xprt)) | ||
1012 | continue; | ||
1013 | found = xprt; | ||
1014 | svc_xprt_get(xprt); | ||
1015 | break; | ||
1016 | } | ||
1017 | spin_unlock_bh(&serv->sv_lock); | ||
1018 | return found; | ||
1019 | } | ||
1020 | EXPORT_SYMBOL_GPL(svc_find_xprt); | ||
1021 | |||
1022 | /* | ||
1023 | * Format a buffer with a list of the active transports. A zero for | ||
1024 | * the buflen parameter disables target buffer overflow checking. | ||
1025 | */ | ||
1026 | int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) | ||
1027 | { | ||
1028 | struct svc_xprt *xprt; | ||
1029 | char xprt_str[64]; | ||
1030 | int totlen = 0; | ||
1031 | int len; | ||
1032 | |||
1033 | /* Sanity check args */ | ||
1034 | if (!serv) | ||
1035 | return 0; | ||
1036 | |||
1037 | spin_lock_bh(&serv->sv_lock); | ||
1038 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { | ||
1039 | len = snprintf(xprt_str, sizeof(xprt_str), | ||
1040 | "%s %d\n", xprt->xpt_class->xcl_name, | ||
1041 | svc_xprt_local_port(xprt)); | ||
1042 | /* If the string was truncated, replace with error string */ | ||
1043 | if (len >= sizeof(xprt_str)) | ||
1044 | strcpy(xprt_str, "name-too-long\n"); | ||
1045 | /* Don't overflow buffer */ | ||
1046 | len = strlen(xprt_str); | ||
1047 | if (buflen && (len + totlen >= buflen)) | ||
1048 | break; | ||
1049 | strcpy(buf+totlen, xprt_str); | ||
1050 | totlen += len; | ||
1051 | } | ||
1052 | spin_unlock_bh(&serv->sv_lock); | ||
1053 | return totlen; | ||
1054 | } | ||
1055 | EXPORT_SYMBOL_GPL(svc_xprt_names); | ||
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index af7c5f05c6e1..8a73cbb16052 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c | |||
@@ -57,11 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) | |||
57 | rqstp->rq_authop = aops; | 57 | rqstp->rq_authop = aops; |
58 | return aops->accept(rqstp, authp); | 58 | return aops->accept(rqstp, authp); |
59 | } | 59 | } |
60 | EXPORT_SYMBOL(svc_authenticate); | ||
60 | 61 | ||
61 | int svc_set_client(struct svc_rqst *rqstp) | 62 | int svc_set_client(struct svc_rqst *rqstp) |
62 | { | 63 | { |
63 | return rqstp->rq_authop->set_client(rqstp); | 64 | return rqstp->rq_authop->set_client(rqstp); |
64 | } | 65 | } |
66 | EXPORT_SYMBOL(svc_set_client); | ||
65 | 67 | ||
66 | /* A request, which was authenticated, has now executed. | 68 | /* A request, which was authenticated, has now executed. |
67 | * Time to finalise the credentials and verifier | 69 | * Time to finalise the credentials and verifier |
@@ -93,6 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops) | |||
93 | spin_unlock(&authtab_lock); | 95 | spin_unlock(&authtab_lock); |
94 | return rv; | 96 | return rv; |
95 | } | 97 | } |
98 | EXPORT_SYMBOL(svc_auth_register); | ||
96 | 99 | ||
97 | void | 100 | void |
98 | svc_auth_unregister(rpc_authflavor_t flavor) | 101 | svc_auth_unregister(rpc_authflavor_t flavor) |
@@ -129,6 +132,7 @@ void auth_domain_put(struct auth_domain *dom) | |||
129 | spin_unlock(&auth_domain_lock); | 132 | spin_unlock(&auth_domain_lock); |
130 | } | 133 | } |
131 | } | 134 | } |
135 | EXPORT_SYMBOL(auth_domain_put); | ||
132 | 136 | ||
133 | struct auth_domain * | 137 | struct auth_domain * |
134 | auth_domain_lookup(char *name, struct auth_domain *new) | 138 | auth_domain_lookup(char *name, struct auth_domain *new) |
@@ -153,8 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new) | |||
153 | spin_unlock(&auth_domain_lock); | 157 | spin_unlock(&auth_domain_lock); |
154 | return new; | 158 | return new; |
155 | } | 159 | } |
160 | EXPORT_SYMBOL(auth_domain_lookup); | ||
156 | 161 | ||
157 | struct auth_domain *auth_domain_find(char *name) | 162 | struct auth_domain *auth_domain_find(char *name) |
158 | { | 163 | { |
159 | return auth_domain_lookup(name, NULL); | 164 | return auth_domain_lookup(name, NULL); |
160 | } | 165 | } |
166 | EXPORT_SYMBOL(auth_domain_find); | ||
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 411479411b21..3c64051e4555 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c | |||
@@ -63,6 +63,7 @@ struct auth_domain *unix_domain_find(char *name) | |||
63 | rv = auth_domain_lookup(name, &new->h); | 63 | rv = auth_domain_lookup(name, &new->h); |
64 | } | 64 | } |
65 | } | 65 | } |
66 | EXPORT_SYMBOL(unix_domain_find); | ||
66 | 67 | ||
67 | static void svcauth_unix_domain_release(struct auth_domain *dom) | 68 | static void svcauth_unix_domain_release(struct auth_domain *dom) |
68 | { | 69 | { |
@@ -340,6 +341,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) | |||
340 | else | 341 | else |
341 | return -ENOMEM; | 342 | return -ENOMEM; |
342 | } | 343 | } |
344 | EXPORT_SYMBOL(auth_unix_add_addr); | ||
343 | 345 | ||
344 | int auth_unix_forget_old(struct auth_domain *dom) | 346 | int auth_unix_forget_old(struct auth_domain *dom) |
345 | { | 347 | { |
@@ -351,6 +353,7 @@ int auth_unix_forget_old(struct auth_domain *dom) | |||
351 | udom->addr_changes++; | 353 | udom->addr_changes++; |
352 | return 0; | 354 | return 0; |
353 | } | 355 | } |
356 | EXPORT_SYMBOL(auth_unix_forget_old); | ||
354 | 357 | ||
355 | struct auth_domain *auth_unix_lookup(struct in_addr addr) | 358 | struct auth_domain *auth_unix_lookup(struct in_addr addr) |
356 | { | 359 | { |
@@ -375,50 +378,56 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) | |||
375 | cache_put(&ipm->h, &ip_map_cache); | 378 | cache_put(&ipm->h, &ip_map_cache); |
376 | return rv; | 379 | return rv; |
377 | } | 380 | } |
381 | EXPORT_SYMBOL(auth_unix_lookup); | ||
378 | 382 | ||
379 | void svcauth_unix_purge(void) | 383 | void svcauth_unix_purge(void) |
380 | { | 384 | { |
381 | cache_purge(&ip_map_cache); | 385 | cache_purge(&ip_map_cache); |
382 | } | 386 | } |
387 | EXPORT_SYMBOL(svcauth_unix_purge); | ||
383 | 388 | ||
384 | static inline struct ip_map * | 389 | static inline struct ip_map * |
385 | ip_map_cached_get(struct svc_rqst *rqstp) | 390 | ip_map_cached_get(struct svc_rqst *rqstp) |
386 | { | 391 | { |
387 | struct ip_map *ipm; | 392 | struct ip_map *ipm = NULL; |
388 | struct svc_sock *svsk = rqstp->rq_sock; | 393 | struct svc_xprt *xprt = rqstp->rq_xprt; |
389 | spin_lock(&svsk->sk_lock); | 394 | |
390 | ipm = svsk->sk_info_authunix; | 395 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { |
391 | if (ipm != NULL) { | 396 | spin_lock(&xprt->xpt_lock); |
392 | if (!cache_valid(&ipm->h)) { | 397 | ipm = xprt->xpt_auth_cache; |
393 | /* | 398 | if (ipm != NULL) { |
394 | * The entry has been invalidated since it was | 399 | if (!cache_valid(&ipm->h)) { |
395 | * remembered, e.g. by a second mount from the | 400 | /* |
396 | * same IP address. | 401 | * The entry has been invalidated since it was |
397 | */ | 402 | * remembered, e.g. by a second mount from the |
398 | svsk->sk_info_authunix = NULL; | 403 | * same IP address. |
399 | spin_unlock(&svsk->sk_lock); | 404 | */ |
400 | cache_put(&ipm->h, &ip_map_cache); | 405 | xprt->xpt_auth_cache = NULL; |
401 | return NULL; | 406 | spin_unlock(&xprt->xpt_lock); |
407 | cache_put(&ipm->h, &ip_map_cache); | ||
408 | return NULL; | ||
409 | } | ||
410 | cache_get(&ipm->h); | ||
402 | } | 411 | } |
403 | cache_get(&ipm->h); | 412 | spin_unlock(&xprt->xpt_lock); |
404 | } | 413 | } |
405 | spin_unlock(&svsk->sk_lock); | ||
406 | return ipm; | 414 | return ipm; |
407 | } | 415 | } |
408 | 416 | ||
409 | static inline void | 417 | static inline void |
410 | ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) | 418 | ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) |
411 | { | 419 | { |
412 | struct svc_sock *svsk = rqstp->rq_sock; | 420 | struct svc_xprt *xprt = rqstp->rq_xprt; |
413 | 421 | ||
414 | spin_lock(&svsk->sk_lock); | 422 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { |
415 | if (svsk->sk_sock->type == SOCK_STREAM && | 423 | spin_lock(&xprt->xpt_lock); |
416 | svsk->sk_info_authunix == NULL) { | 424 | if (xprt->xpt_auth_cache == NULL) { |
417 | /* newly cached, keep the reference */ | 425 | /* newly cached, keep the reference */ |
418 | svsk->sk_info_authunix = ipm; | 426 | xprt->xpt_auth_cache = ipm; |
419 | ipm = NULL; | 427 | ipm = NULL; |
428 | } | ||
429 | spin_unlock(&xprt->xpt_lock); | ||
420 | } | 430 | } |
421 | spin_unlock(&svsk->sk_lock); | ||
422 | if (ipm) | 431 | if (ipm) |
423 | cache_put(&ipm->h, &ip_map_cache); | 432 | cache_put(&ipm->h, &ip_map_cache); |
424 | } | 433 | } |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c75bffeb89eb..1d3e5fcc2cc4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * The server scheduling algorithm does not always distribute the load | 6 | * The server scheduling algorithm does not always distribute the load |
7 | * evenly when servicing a single client. May need to modify the | 7 | * evenly when servicing a single client. May need to modify the |
8 | * svc_sock_enqueue procedure... | 8 | * svc_xprt_enqueue procedure... |
9 | * | 9 | * |
10 | * TCP support is largely untested and may be a little slow. The problem | 10 | * TCP support is largely untested and may be a little slow. The problem |
11 | * is that we currently do two separate recvfrom's, one for the 4-byte | 11 | * is that we currently do two separate recvfrom's, one for the 4-byte |
@@ -48,72 +48,40 @@ | |||
48 | #include <linux/sunrpc/svcsock.h> | 48 | #include <linux/sunrpc/svcsock.h> |
49 | #include <linux/sunrpc/stats.h> | 49 | #include <linux/sunrpc/stats.h> |
50 | 50 | ||
51 | /* SMP locking strategy: | 51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT |
52 | * | ||
53 | * svc_pool->sp_lock protects most of the fields of that pool. | ||
54 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
55 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
56 | * BKL protects svc_serv->sv_nrthread. | ||
57 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | ||
58 | * and the ->sk_info_authunix cache. | ||
59 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | ||
60 | * | ||
61 | * Some flags can be set to certain values at any time | ||
62 | * providing that certain rules are followed: | ||
63 | * | ||
64 | * SK_CONN, SK_DATA, can be set or cleared at any time. | ||
65 | * after a set, svc_sock_enqueue must be called. | ||
66 | * after a clear, the socket must be read/accepted | ||
67 | * if this succeeds, it must be set again. | ||
68 | * SK_CLOSE can set at any time. It is never cleared. | ||
69 | * sk_inuse contains a bias of '1' until SK_DEAD is set. | ||
70 | * so when sk_inuse hits zero, we know the socket is dead | ||
71 | * and no-one is using it. | ||
72 | * SK_DEAD can only be set while SK_BUSY is held which ensures | ||
73 | * no other thread will be using the socket or will try to | ||
74 | * set SK_DEAD. | ||
75 | * | ||
76 | */ | ||
77 | |||
78 | #define RPCDBG_FACILITY RPCDBG_SVCSOCK | ||
79 | 52 | ||
80 | 53 | ||
81 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, | 54 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, |
82 | int *errp, int flags); | 55 | int *errp, int flags); |
83 | static void svc_delete_socket(struct svc_sock *svsk); | ||
84 | static void svc_udp_data_ready(struct sock *, int); | 56 | static void svc_udp_data_ready(struct sock *, int); |
85 | static int svc_udp_recvfrom(struct svc_rqst *); | 57 | static int svc_udp_recvfrom(struct svc_rqst *); |
86 | static int svc_udp_sendto(struct svc_rqst *); | 58 | static int svc_udp_sendto(struct svc_rqst *); |
87 | static void svc_close_socket(struct svc_sock *svsk); | 59 | static void svc_sock_detach(struct svc_xprt *); |
88 | 60 | static void svc_sock_free(struct svc_xprt *); | |
89 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); | ||
90 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
91 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
92 | |||
93 | /* apparently the "standard" is that clients close | ||
94 | * idle connections after 5 minutes, servers after | ||
95 | * 6 minutes | ||
96 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
97 | */ | ||
98 | static int svc_conn_age_period = 6*60; | ||
99 | 61 | ||
62 | static struct svc_xprt *svc_create_socket(struct svc_serv *, int, | ||
63 | struct sockaddr *, int, int); | ||
100 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 64 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
101 | static struct lock_class_key svc_key[2]; | 65 | static struct lock_class_key svc_key[2]; |
102 | static struct lock_class_key svc_slock_key[2]; | 66 | static struct lock_class_key svc_slock_key[2]; |
103 | 67 | ||
104 | static inline void svc_reclassify_socket(struct socket *sock) | 68 | static void svc_reclassify_socket(struct socket *sock) |
105 | { | 69 | { |
106 | struct sock *sk = sock->sk; | 70 | struct sock *sk = sock->sk; |
107 | BUG_ON(sock_owned_by_user(sk)); | 71 | BUG_ON(sock_owned_by_user(sk)); |
108 | switch (sk->sk_family) { | 72 | switch (sk->sk_family) { |
109 | case AF_INET: | 73 | case AF_INET: |
110 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", | 74 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", |
111 | &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); | 75 | &svc_slock_key[0], |
76 | "sk_xprt.xpt_lock-AF_INET-NFSD", | ||
77 | &svc_key[0]); | ||
112 | break; | 78 | break; |
113 | 79 | ||
114 | case AF_INET6: | 80 | case AF_INET6: |
115 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", | 81 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", |
116 | &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); | 82 | &svc_slock_key[1], |
83 | "sk_xprt.xpt_lock-AF_INET6-NFSD", | ||
84 | &svc_key[1]); | ||
117 | break; | 85 | break; |
118 | 86 | ||
119 | default: | 87 | default: |
@@ -121,81 +89,26 @@ static inline void svc_reclassify_socket(struct socket *sock) | |||
121 | } | 89 | } |
122 | } | 90 | } |
123 | #else | 91 | #else |
124 | static inline void svc_reclassify_socket(struct socket *sock) | 92 | static void svc_reclassify_socket(struct socket *sock) |
125 | { | 93 | { |
126 | } | 94 | } |
127 | #endif | 95 | #endif |
128 | 96 | ||
129 | static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len) | ||
130 | { | ||
131 | switch (addr->sa_family) { | ||
132 | case AF_INET: | ||
133 | snprintf(buf, len, "%u.%u.%u.%u, port=%u", | ||
134 | NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), | ||
135 | ntohs(((struct sockaddr_in *) addr)->sin_port)); | ||
136 | break; | ||
137 | |||
138 | case AF_INET6: | ||
139 | snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", | ||
140 | NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), | ||
141 | ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); | ||
142 | break; | ||
143 | |||
144 | default: | ||
145 | snprintf(buf, len, "unknown address type: %d", addr->sa_family); | ||
146 | break; | ||
147 | } | ||
148 | return buf; | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * svc_print_addr - Format rq_addr field for printing | ||
153 | * @rqstp: svc_rqst struct containing address to print | ||
154 | * @buf: target buffer for formatted address | ||
155 | * @len: length of target buffer | ||
156 | * | ||
157 | */ | ||
158 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | ||
159 | { | ||
160 | return __svc_print_addr(svc_addr(rqstp), buf, len); | ||
161 | } | ||
162 | EXPORT_SYMBOL_GPL(svc_print_addr); | ||
163 | |||
164 | /* | ||
165 | * Queue up an idle server thread. Must have pool->sp_lock held. | ||
166 | * Note: this is really a stack rather than a queue, so that we only | ||
167 | * use as many different threads as we need, and the rest don't pollute | ||
168 | * the cache. | ||
169 | */ | ||
170 | static inline void | ||
171 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
172 | { | ||
173 | list_add(&rqstp->rq_list, &pool->sp_threads); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | ||
178 | */ | ||
179 | static inline void | ||
180 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
181 | { | ||
182 | list_del(&rqstp->rq_list); | ||
183 | } | ||
184 | |||
185 | /* | 97 | /* |
186 | * Release an skbuff after use | 98 | * Release an skbuff after use |
187 | */ | 99 | */ |
188 | static inline void | 100 | static void svc_release_skb(struct svc_rqst *rqstp) |
189 | svc_release_skb(struct svc_rqst *rqstp) | ||
190 | { | 101 | { |
191 | struct sk_buff *skb = rqstp->rq_skbuff; | 102 | struct sk_buff *skb = rqstp->rq_xprt_ctxt; |
192 | struct svc_deferred_req *dr = rqstp->rq_deferred; | 103 | struct svc_deferred_req *dr = rqstp->rq_deferred; |
193 | 104 | ||
194 | if (skb) { | 105 | if (skb) { |
195 | rqstp->rq_skbuff = NULL; | 106 | struct svc_sock *svsk = |
107 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
108 | rqstp->rq_xprt_ctxt = NULL; | ||
196 | 109 | ||
197 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); | 110 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); |
198 | skb_free_datagram(rqstp->rq_sock->sk_sk, skb); | 111 | skb_free_datagram(svsk->sk_sk, skb); |
199 | } | 112 | } |
200 | if (dr) { | 113 | if (dr) { |
201 | rqstp->rq_deferred = NULL; | 114 | rqstp->rq_deferred = NULL; |
@@ -203,253 +116,6 @@ svc_release_skb(struct svc_rqst *rqstp) | |||
203 | } | 116 | } |
204 | } | 117 | } |
205 | 118 | ||
206 | /* | ||
207 | * Any space to write? | ||
208 | */ | ||
209 | static inline unsigned long | ||
210 | svc_sock_wspace(struct svc_sock *svsk) | ||
211 | { | ||
212 | int wspace; | ||
213 | |||
214 | if (svsk->sk_sock->type == SOCK_STREAM) | ||
215 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
216 | else | ||
217 | wspace = sock_wspace(svsk->sk_sk); | ||
218 | |||
219 | return wspace; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Queue up a socket with data pending. If there are idle nfsd | ||
224 | * processes, wake 'em up. | ||
225 | * | ||
226 | */ | ||
227 | static void | ||
228 | svc_sock_enqueue(struct svc_sock *svsk) | ||
229 | { | ||
230 | struct svc_serv *serv = svsk->sk_server; | ||
231 | struct svc_pool *pool; | ||
232 | struct svc_rqst *rqstp; | ||
233 | int cpu; | ||
234 | |||
235 | if (!(svsk->sk_flags & | ||
236 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) | ||
237 | return; | ||
238 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
239 | return; | ||
240 | |||
241 | cpu = get_cpu(); | ||
242 | pool = svc_pool_for_cpu(svsk->sk_server, cpu); | ||
243 | put_cpu(); | ||
244 | |||
245 | spin_lock_bh(&pool->sp_lock); | ||
246 | |||
247 | if (!list_empty(&pool->sp_threads) && | ||
248 | !list_empty(&pool->sp_sockets)) | ||
249 | printk(KERN_ERR | ||
250 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | ||
251 | |||
252 | if (test_bit(SK_DEAD, &svsk->sk_flags)) { | ||
253 | /* Don't enqueue dead sockets */ | ||
254 | dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); | ||
255 | goto out_unlock; | ||
256 | } | ||
257 | |||
258 | /* Mark socket as busy. It will remain in this state until the | ||
259 | * server has processed all pending data and put the socket back | ||
260 | * on the idle list. We update SK_BUSY atomically because | ||
261 | * it also guards against trying to enqueue the svc_sock twice. | ||
262 | */ | ||
263 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { | ||
264 | /* Don't enqueue socket while already enqueued */ | ||
265 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | ||
266 | goto out_unlock; | ||
267 | } | ||
268 | BUG_ON(svsk->sk_pool != NULL); | ||
269 | svsk->sk_pool = pool; | ||
270 | |||
271 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
272 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 | ||
273 | > svc_sock_wspace(svsk)) | ||
274 | && !test_bit(SK_CLOSE, &svsk->sk_flags) | ||
275 | && !test_bit(SK_CONN, &svsk->sk_flags)) { | ||
276 | /* Don't enqueue while not enough space for reply */ | ||
277 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | ||
278 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, | ||
279 | svc_sock_wspace(svsk)); | ||
280 | svsk->sk_pool = NULL; | ||
281 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
282 | goto out_unlock; | ||
283 | } | ||
284 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
285 | |||
286 | |||
287 | if (!list_empty(&pool->sp_threads)) { | ||
288 | rqstp = list_entry(pool->sp_threads.next, | ||
289 | struct svc_rqst, | ||
290 | rq_list); | ||
291 | dprintk("svc: socket %p served by daemon %p\n", | ||
292 | svsk->sk_sk, rqstp); | ||
293 | svc_thread_dequeue(pool, rqstp); | ||
294 | if (rqstp->rq_sock) | ||
295 | printk(KERN_ERR | ||
296 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | ||
297 | rqstp, rqstp->rq_sock); | ||
298 | rqstp->rq_sock = svsk; | ||
299 | atomic_inc(&svsk->sk_inuse); | ||
300 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
301 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
302 | BUG_ON(svsk->sk_pool != pool); | ||
303 | wake_up(&rqstp->rq_wait); | ||
304 | } else { | ||
305 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | ||
306 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); | ||
307 | BUG_ON(svsk->sk_pool != pool); | ||
308 | } | ||
309 | |||
310 | out_unlock: | ||
311 | spin_unlock_bh(&pool->sp_lock); | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Dequeue the first socket. Must be called with the pool->sp_lock held. | ||
316 | */ | ||
317 | static inline struct svc_sock * | ||
318 | svc_sock_dequeue(struct svc_pool *pool) | ||
319 | { | ||
320 | struct svc_sock *svsk; | ||
321 | |||
322 | if (list_empty(&pool->sp_sockets)) | ||
323 | return NULL; | ||
324 | |||
325 | svsk = list_entry(pool->sp_sockets.next, | ||
326 | struct svc_sock, sk_ready); | ||
327 | list_del_init(&svsk->sk_ready); | ||
328 | |||
329 | dprintk("svc: socket %p dequeued, inuse=%d\n", | ||
330 | svsk->sk_sk, atomic_read(&svsk->sk_inuse)); | ||
331 | |||
332 | return svsk; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Having read something from a socket, check whether it | ||
337 | * needs to be re-enqueued. | ||
338 | * Note: SK_DATA only gets cleared when a read-attempt finds | ||
339 | * no (or insufficient) data. | ||
340 | */ | ||
341 | static inline void | ||
342 | svc_sock_received(struct svc_sock *svsk) | ||
343 | { | ||
344 | svsk->sk_pool = NULL; | ||
345 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
346 | svc_sock_enqueue(svsk); | ||
347 | } | ||
348 | |||
349 | |||
350 | /** | ||
351 | * svc_reserve - change the space reserved for the reply to a request. | ||
352 | * @rqstp: The request in question | ||
353 | * @space: new max space to reserve | ||
354 | * | ||
355 | * Each request reserves some space on the output queue of the socket | ||
356 | * to make sure the reply fits. This function reduces that reserved | ||
357 | * space to be the amount of space used already, plus @space. | ||
358 | * | ||
359 | */ | ||
360 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
361 | { | ||
362 | space += rqstp->rq_res.head[0].iov_len; | ||
363 | |||
364 | if (space < rqstp->rq_reserved) { | ||
365 | struct svc_sock *svsk = rqstp->rq_sock; | ||
366 | atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); | ||
367 | rqstp->rq_reserved = space; | ||
368 | |||
369 | svc_sock_enqueue(svsk); | ||
370 | } | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Release a socket after use. | ||
375 | */ | ||
376 | static inline void | ||
377 | svc_sock_put(struct svc_sock *svsk) | ||
378 | { | ||
379 | if (atomic_dec_and_test(&svsk->sk_inuse)) { | ||
380 | BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); | ||
381 | |||
382 | dprintk("svc: releasing dead socket\n"); | ||
383 | if (svsk->sk_sock->file) | ||
384 | sockfd_put(svsk->sk_sock); | ||
385 | else | ||
386 | sock_release(svsk->sk_sock); | ||
387 | if (svsk->sk_info_authunix != NULL) | ||
388 | svcauth_unix_info_release(svsk->sk_info_authunix); | ||
389 | kfree(svsk); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | static void | ||
394 | svc_sock_release(struct svc_rqst *rqstp) | ||
395 | { | ||
396 | struct svc_sock *svsk = rqstp->rq_sock; | ||
397 | |||
398 | svc_release_skb(rqstp); | ||
399 | |||
400 | svc_free_res_pages(rqstp); | ||
401 | rqstp->rq_res.page_len = 0; | ||
402 | rqstp->rq_res.page_base = 0; | ||
403 | |||
404 | |||
405 | /* Reset response buffer and release | ||
406 | * the reservation. | ||
407 | * But first, check that enough space was reserved | ||
408 | * for the reply, otherwise we have a bug! | ||
409 | */ | ||
410 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
411 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
412 | rqstp->rq_reserved, | ||
413 | rqstp->rq_res.len); | ||
414 | |||
415 | rqstp->rq_res.head[0].iov_len = 0; | ||
416 | svc_reserve(rqstp, 0); | ||
417 | rqstp->rq_sock = NULL; | ||
418 | |||
419 | svc_sock_put(svsk); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * External function to wake up a server waiting for data | ||
424 | * This really only makes sense for services like lockd | ||
425 | * which have exactly one thread anyway. | ||
426 | */ | ||
427 | void | ||
428 | svc_wake_up(struct svc_serv *serv) | ||
429 | { | ||
430 | struct svc_rqst *rqstp; | ||
431 | unsigned int i; | ||
432 | struct svc_pool *pool; | ||
433 | |||
434 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
435 | pool = &serv->sv_pools[i]; | ||
436 | |||
437 | spin_lock_bh(&pool->sp_lock); | ||
438 | if (!list_empty(&pool->sp_threads)) { | ||
439 | rqstp = list_entry(pool->sp_threads.next, | ||
440 | struct svc_rqst, | ||
441 | rq_list); | ||
442 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
443 | /* | ||
444 | svc_thread_dequeue(pool, rqstp); | ||
445 | rqstp->rq_sock = NULL; | ||
446 | */ | ||
447 | wake_up(&rqstp->rq_wait); | ||
448 | } | ||
449 | spin_unlock_bh(&pool->sp_lock); | ||
450 | } | ||
451 | } | ||
452 | |||
453 | union svc_pktinfo_u { | 119 | union svc_pktinfo_u { |
454 | struct in_pktinfo pkti; | 120 | struct in_pktinfo pkti; |
455 | struct in6_pktinfo pkti6; | 121 | struct in6_pktinfo pkti6; |
@@ -459,7 +125,9 @@ union svc_pktinfo_u { | |||
459 | 125 | ||
460 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | 126 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) |
461 | { | 127 | { |
462 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 128 | struct svc_sock *svsk = |
129 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
130 | switch (svsk->sk_sk->sk_family) { | ||
463 | case AF_INET: { | 131 | case AF_INET: { |
464 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 132 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
465 | 133 | ||
@@ -489,10 +157,10 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | |||
489 | /* | 157 | /* |
490 | * Generic sendto routine | 158 | * Generic sendto routine |
491 | */ | 159 | */ |
492 | static int | 160 | static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) |
493 | svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | ||
494 | { | 161 | { |
495 | struct svc_sock *svsk = rqstp->rq_sock; | 162 | struct svc_sock *svsk = |
163 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
496 | struct socket *sock = svsk->sk_sock; | 164 | struct socket *sock = svsk->sk_sock; |
497 | int slen; | 165 | int slen; |
498 | union { | 166 | union { |
@@ -565,7 +233,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | |||
565 | } | 233 | } |
566 | out: | 234 | out: |
567 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", | 235 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", |
568 | rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, | 236 | svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, |
569 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); | 237 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); |
570 | 238 | ||
571 | return len; | 239 | return len; |
@@ -602,7 +270,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
602 | if (!serv) | 270 | if (!serv) |
603 | return 0; | 271 | return 0; |
604 | spin_lock_bh(&serv->sv_lock); | 272 | spin_lock_bh(&serv->sv_lock); |
605 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { | 273 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { |
606 | int onelen = one_sock_name(buf+len, svsk); | 274 | int onelen = one_sock_name(buf+len, svsk); |
607 | if (toclose && strcmp(toclose, buf+len) == 0) | 275 | if (toclose && strcmp(toclose, buf+len) == 0) |
608 | closesk = svsk; | 276 | closesk = svsk; |
@@ -614,7 +282,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
614 | /* Should unregister with portmap, but you cannot | 282 | /* Should unregister with portmap, but you cannot |
615 | * unregister just one protocol... | 283 | * unregister just one protocol... |
616 | */ | 284 | */ |
617 | svc_close_socket(closesk); | 285 | svc_close_xprt(&closesk->sk_xprt); |
618 | else if (toclose) | 286 | else if (toclose) |
619 | return -ENOENT; | 287 | return -ENOENT; |
620 | return len; | 288 | return len; |
@@ -624,8 +292,7 @@ EXPORT_SYMBOL(svc_sock_names); | |||
624 | /* | 292 | /* |
625 | * Check input queue length | 293 | * Check input queue length |
626 | */ | 294 | */ |
627 | static int | 295 | static int svc_recv_available(struct svc_sock *svsk) |
628 | svc_recv_available(struct svc_sock *svsk) | ||
629 | { | 296 | { |
630 | struct socket *sock = svsk->sk_sock; | 297 | struct socket *sock = svsk->sk_sock; |
631 | int avail, err; | 298 | int avail, err; |
@@ -638,48 +305,31 @@ svc_recv_available(struct svc_sock *svsk) | |||
638 | /* | 305 | /* |
639 | * Generic recvfrom routine. | 306 | * Generic recvfrom routine. |
640 | */ | 307 | */ |
641 | static int | 308 | static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, |
642 | svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) | 309 | int buflen) |
643 | { | 310 | { |
644 | struct svc_sock *svsk = rqstp->rq_sock; | 311 | struct svc_sock *svsk = |
312 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
645 | struct msghdr msg = { | 313 | struct msghdr msg = { |
646 | .msg_flags = MSG_DONTWAIT, | 314 | .msg_flags = MSG_DONTWAIT, |
647 | }; | 315 | }; |
648 | struct sockaddr *sin; | ||
649 | int len; | 316 | int len; |
650 | 317 | ||
318 | rqstp->rq_xprt_hlen = 0; | ||
319 | |||
651 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, | 320 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, |
652 | msg.msg_flags); | 321 | msg.msg_flags); |
653 | 322 | ||
654 | /* sock_recvmsg doesn't fill in the name/namelen, so we must.. | ||
655 | */ | ||
656 | memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); | ||
657 | rqstp->rq_addrlen = svsk->sk_remotelen; | ||
658 | |||
659 | /* Destination address in request is needed for binding the | ||
660 | * source address in RPC callbacks later. | ||
661 | */ | ||
662 | sin = (struct sockaddr *)&svsk->sk_local; | ||
663 | switch (sin->sa_family) { | ||
664 | case AF_INET: | ||
665 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | ||
666 | break; | ||
667 | case AF_INET6: | ||
668 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | ||
669 | break; | ||
670 | } | ||
671 | |||
672 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", | 323 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", |
673 | svsk, iov[0].iov_base, iov[0].iov_len, len); | 324 | svsk, iov[0].iov_base, iov[0].iov_len, len); |
674 | |||
675 | return len; | 325 | return len; |
676 | } | 326 | } |
677 | 327 | ||
678 | /* | 328 | /* |
679 | * Set socket snd and rcv buffer lengths | 329 | * Set socket snd and rcv buffer lengths |
680 | */ | 330 | */ |
681 | static inline void | 331 | static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, |
682 | svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | 332 | unsigned int rcv) |
683 | { | 333 | { |
684 | #if 0 | 334 | #if 0 |
685 | mm_segment_t oldfs; | 335 | mm_segment_t oldfs; |
@@ -704,16 +354,16 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | |||
704 | /* | 354 | /* |
705 | * INET callback when data has been received on the socket. | 355 | * INET callback when data has been received on the socket. |
706 | */ | 356 | */ |
707 | static void | 357 | static void svc_udp_data_ready(struct sock *sk, int count) |
708 | svc_udp_data_ready(struct sock *sk, int count) | ||
709 | { | 358 | { |
710 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 359 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
711 | 360 | ||
712 | if (svsk) { | 361 | if (svsk) { |
713 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", | 362 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", |
714 | svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); | 363 | svsk, sk, count, |
715 | set_bit(SK_DATA, &svsk->sk_flags); | 364 | test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
716 | svc_sock_enqueue(svsk); | 365 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
366 | svc_xprt_enqueue(&svsk->sk_xprt); | ||
717 | } | 367 | } |
718 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 368 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
719 | wake_up_interruptible(sk->sk_sleep); | 369 | wake_up_interruptible(sk->sk_sleep); |
@@ -722,15 +372,14 @@ svc_udp_data_ready(struct sock *sk, int count) | |||
722 | /* | 372 | /* |
723 | * INET callback when space is newly available on the socket. | 373 | * INET callback when space is newly available on the socket. |
724 | */ | 374 | */ |
725 | static void | 375 | static void svc_write_space(struct sock *sk) |
726 | svc_write_space(struct sock *sk) | ||
727 | { | 376 | { |
728 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); | 377 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); |
729 | 378 | ||
730 | if (svsk) { | 379 | if (svsk) { |
731 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", | 380 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", |
732 | svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); | 381 | svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
733 | svc_sock_enqueue(svsk); | 382 | svc_xprt_enqueue(&svsk->sk_xprt); |
734 | } | 383 | } |
735 | 384 | ||
736 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { | 385 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { |
@@ -740,10 +389,19 @@ svc_write_space(struct sock *sk) | |||
740 | } | 389 | } |
741 | } | 390 | } |
742 | 391 | ||
743 | static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | 392 | /* |
744 | struct cmsghdr *cmh) | 393 | * Copy the UDP datagram's destination address to the rqstp structure. |
394 | * The 'destination' address in this case is the address to which the | ||
395 | * peer sent the datagram, i.e. our local address. For multihomed | ||
396 | * hosts, this can change from msg to msg. Note that only the IP | ||
397 | * address changes, the port number should remain the same. | ||
398 | */ | ||
399 | static void svc_udp_get_dest_address(struct svc_rqst *rqstp, | ||
400 | struct cmsghdr *cmh) | ||
745 | { | 401 | { |
746 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 402 | struct svc_sock *svsk = |
403 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
404 | switch (svsk->sk_sk->sk_family) { | ||
747 | case AF_INET: { | 405 | case AF_INET: { |
748 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 406 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
749 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; | 407 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; |
@@ -760,11 +418,11 @@ static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | |||
760 | /* | 418 | /* |
761 | * Receive a datagram from a UDP socket. | 419 | * Receive a datagram from a UDP socket. |
762 | */ | 420 | */ |
763 | static int | 421 | static int svc_udp_recvfrom(struct svc_rqst *rqstp) |
764 | svc_udp_recvfrom(struct svc_rqst *rqstp) | ||
765 | { | 422 | { |
766 | struct svc_sock *svsk = rqstp->rq_sock; | 423 | struct svc_sock *svsk = |
767 | struct svc_serv *serv = svsk->sk_server; | 424 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
425 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
768 | struct sk_buff *skb; | 426 | struct sk_buff *skb; |
769 | union { | 427 | union { |
770 | struct cmsghdr hdr; | 428 | struct cmsghdr hdr; |
@@ -779,7 +437,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
779 | .msg_flags = MSG_DONTWAIT, | 437 | .msg_flags = MSG_DONTWAIT, |
780 | }; | 438 | }; |
781 | 439 | ||
782 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | 440 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
783 | /* udp sockets need large rcvbuf as all pending | 441 | /* udp sockets need large rcvbuf as all pending |
784 | * requests are still in that buffer. sndbuf must | 442 | * requests are still in that buffer. sndbuf must |
785 | * also be large enough that there is enough space | 443 | * also be large enough that there is enough space |
@@ -792,17 +450,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
792 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 450 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
793 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); | 451 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); |
794 | 452 | ||
795 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 453 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
796 | svc_sock_received(svsk); | ||
797 | return svc_deferred_recv(rqstp); | ||
798 | } | ||
799 | |||
800 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
801 | svc_delete_socket(svsk); | ||
802 | return 0; | ||
803 | } | ||
804 | |||
805 | clear_bit(SK_DATA, &svsk->sk_flags); | ||
806 | skb = NULL; | 454 | skb = NULL; |
807 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, | 455 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, |
808 | 0, 0, MSG_PEEK | MSG_DONTWAIT); | 456 | 0, 0, MSG_PEEK | MSG_DONTWAIT); |
@@ -813,24 +461,27 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
813 | if (err != -EAGAIN) { | 461 | if (err != -EAGAIN) { |
814 | /* possibly an icmp error */ | 462 | /* possibly an icmp error */ |
815 | dprintk("svc: recvfrom returned error %d\n", -err); | 463 | dprintk("svc: recvfrom returned error %d\n", -err); |
816 | set_bit(SK_DATA, &svsk->sk_flags); | 464 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
817 | } | 465 | } |
818 | svc_sock_received(svsk); | 466 | svc_xprt_received(&svsk->sk_xprt); |
819 | return -EAGAIN; | 467 | return -EAGAIN; |
820 | } | 468 | } |
821 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); | 469 | len = svc_addr_len(svc_addr(rqstp)); |
470 | if (len < 0) | ||
471 | return len; | ||
472 | rqstp->rq_addrlen = len; | ||
822 | if (skb->tstamp.tv64 == 0) { | 473 | if (skb->tstamp.tv64 == 0) { |
823 | skb->tstamp = ktime_get_real(); | 474 | skb->tstamp = ktime_get_real(); |
824 | /* Don't enable netstamp, sunrpc doesn't | 475 | /* Don't enable netstamp, sunrpc doesn't |
825 | need that much accuracy */ | 476 | need that much accuracy */ |
826 | } | 477 | } |
827 | svsk->sk_sk->sk_stamp = skb->tstamp; | 478 | svsk->sk_sk->sk_stamp = skb->tstamp; |
828 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ | 479 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ |
829 | 480 | ||
830 | /* | 481 | /* |
831 | * Maybe more packets - kick another thread ASAP. | 482 | * Maybe more packets - kick another thread ASAP. |
832 | */ | 483 | */ |
833 | svc_sock_received(svsk); | 484 | svc_xprt_received(&svsk->sk_xprt); |
834 | 485 | ||
835 | len = skb->len - sizeof(struct udphdr); | 486 | len = skb->len - sizeof(struct udphdr); |
836 | rqstp->rq_arg.len = len; | 487 | rqstp->rq_arg.len = len; |
@@ -861,13 +512,14 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
861 | skb_free_datagram(svsk->sk_sk, skb); | 512 | skb_free_datagram(svsk->sk_sk, skb); |
862 | } else { | 513 | } else { |
863 | /* we can use it in-place */ | 514 | /* we can use it in-place */ |
864 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); | 515 | rqstp->rq_arg.head[0].iov_base = skb->data + |
516 | sizeof(struct udphdr); | ||
865 | rqstp->rq_arg.head[0].iov_len = len; | 517 | rqstp->rq_arg.head[0].iov_len = len; |
866 | if (skb_checksum_complete(skb)) { | 518 | if (skb_checksum_complete(skb)) { |
867 | skb_free_datagram(svsk->sk_sk, skb); | 519 | skb_free_datagram(svsk->sk_sk, skb); |
868 | return 0; | 520 | return 0; |
869 | } | 521 | } |
870 | rqstp->rq_skbuff = skb; | 522 | rqstp->rq_xprt_ctxt = skb; |
871 | } | 523 | } |
872 | 524 | ||
873 | rqstp->rq_arg.page_base = 0; | 525 | rqstp->rq_arg.page_base = 0; |
@@ -900,27 +552,81 @@ svc_udp_sendto(struct svc_rqst *rqstp) | |||
900 | return error; | 552 | return error; |
901 | } | 553 | } |
902 | 554 | ||
903 | static void | 555 | static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp) |
904 | svc_udp_init(struct svc_sock *svsk) | 556 | { |
557 | } | ||
558 | |||
559 | static int svc_udp_has_wspace(struct svc_xprt *xprt) | ||
560 | { | ||
561 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
562 | struct svc_serv *serv = xprt->xpt_server; | ||
563 | unsigned long required; | ||
564 | |||
565 | /* | ||
566 | * Set the SOCK_NOSPACE flag before checking the available | ||
567 | * sock space. | ||
568 | */ | ||
569 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
570 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
571 | if (required*2 > sock_wspace(svsk->sk_sk)) | ||
572 | return 0; | ||
573 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
574 | return 1; | ||
575 | } | ||
576 | |||
577 | static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) | ||
578 | { | ||
579 | BUG(); | ||
580 | return NULL; | ||
581 | } | ||
582 | |||
583 | static struct svc_xprt *svc_udp_create(struct svc_serv *serv, | ||
584 | struct sockaddr *sa, int salen, | ||
585 | int flags) | ||
586 | { | ||
587 | return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); | ||
588 | } | ||
589 | |||
590 | static struct svc_xprt_ops svc_udp_ops = { | ||
591 | .xpo_create = svc_udp_create, | ||
592 | .xpo_recvfrom = svc_udp_recvfrom, | ||
593 | .xpo_sendto = svc_udp_sendto, | ||
594 | .xpo_release_rqst = svc_release_skb, | ||
595 | .xpo_detach = svc_sock_detach, | ||
596 | .xpo_free = svc_sock_free, | ||
597 | .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, | ||
598 | .xpo_has_wspace = svc_udp_has_wspace, | ||
599 | .xpo_accept = svc_udp_accept, | ||
600 | }; | ||
601 | |||
602 | static struct svc_xprt_class svc_udp_class = { | ||
603 | .xcl_name = "udp", | ||
604 | .xcl_owner = THIS_MODULE, | ||
605 | .xcl_ops = &svc_udp_ops, | ||
606 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, | ||
607 | }; | ||
608 | |||
609 | static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
905 | { | 610 | { |
906 | int one = 1; | 611 | int one = 1; |
907 | mm_segment_t oldfs; | 612 | mm_segment_t oldfs; |
908 | 613 | ||
614 | svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); | ||
615 | clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); | ||
909 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; | 616 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; |
910 | svsk->sk_sk->sk_write_space = svc_write_space; | 617 | svsk->sk_sk->sk_write_space = svc_write_space; |
911 | svsk->sk_recvfrom = svc_udp_recvfrom; | ||
912 | svsk->sk_sendto = svc_udp_sendto; | ||
913 | 618 | ||
914 | /* initialise setting must have enough space to | 619 | /* initialise setting must have enough space to |
915 | * receive and respond to one request. | 620 | * receive and respond to one request. |
916 | * svc_udp_recvfrom will re-adjust if necessary | 621 | * svc_udp_recvfrom will re-adjust if necessary |
917 | */ | 622 | */ |
918 | svc_sock_setbufsize(svsk->sk_sock, | 623 | svc_sock_setbufsize(svsk->sk_sock, |
919 | 3 * svsk->sk_server->sv_max_mesg, | 624 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
920 | 3 * svsk->sk_server->sv_max_mesg); | 625 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
921 | 626 | ||
922 | set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ | 627 | /* data might have come in before data_ready set up */ |
923 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 628 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
629 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); | ||
924 | 630 | ||
925 | oldfs = get_fs(); | 631 | oldfs = get_fs(); |
926 | set_fs(KERNEL_DS); | 632 | set_fs(KERNEL_DS); |
@@ -934,8 +640,7 @@ svc_udp_init(struct svc_sock *svsk) | |||
934 | * A data_ready event on a listening socket means there's a connection | 640 | * A data_ready event on a listening socket means there's a connection |
935 | * pending. Do not use state_change as a substitute for it. | 641 | * pending. Do not use state_change as a substitute for it. |
936 | */ | 642 | */ |
937 | static void | 643 | static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) |
938 | svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | ||
939 | { | 644 | { |
940 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 645 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
941 | 646 | ||
@@ -954,8 +659,8 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
954 | */ | 659 | */ |
955 | if (sk->sk_state == TCP_LISTEN) { | 660 | if (sk->sk_state == TCP_LISTEN) { |
956 | if (svsk) { | 661 | if (svsk) { |
957 | set_bit(SK_CONN, &svsk->sk_flags); | 662 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
958 | svc_sock_enqueue(svsk); | 663 | svc_xprt_enqueue(&svsk->sk_xprt); |
959 | } else | 664 | } else |
960 | printk("svc: socket %p: no user data\n", sk); | 665 | printk("svc: socket %p: no user data\n", sk); |
961 | } | 666 | } |
@@ -967,8 +672,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
967 | /* | 672 | /* |
968 | * A state change on a connected socket means it's dying or dead. | 673 | * A state change on a connected socket means it's dying or dead. |
969 | */ | 674 | */ |
970 | static void | 675 | static void svc_tcp_state_change(struct sock *sk) |
971 | svc_tcp_state_change(struct sock *sk) | ||
972 | { | 676 | { |
973 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 677 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
974 | 678 | ||
@@ -978,51 +682,36 @@ svc_tcp_state_change(struct sock *sk) | |||
978 | if (!svsk) | 682 | if (!svsk) |
979 | printk("svc: socket %p: no user data\n", sk); | 683 | printk("svc: socket %p: no user data\n", sk); |
980 | else { | 684 | else { |
981 | set_bit(SK_CLOSE, &svsk->sk_flags); | 685 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
982 | svc_sock_enqueue(svsk); | 686 | svc_xprt_enqueue(&svsk->sk_xprt); |
983 | } | 687 | } |
984 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 688 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
985 | wake_up_interruptible_all(sk->sk_sleep); | 689 | wake_up_interruptible_all(sk->sk_sleep); |
986 | } | 690 | } |
987 | 691 | ||
988 | static void | 692 | static void svc_tcp_data_ready(struct sock *sk, int count) |
989 | svc_tcp_data_ready(struct sock *sk, int count) | ||
990 | { | 693 | { |
991 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 694 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
992 | 695 | ||
993 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", | 696 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", |
994 | sk, sk->sk_user_data); | 697 | sk, sk->sk_user_data); |
995 | if (svsk) { | 698 | if (svsk) { |
996 | set_bit(SK_DATA, &svsk->sk_flags); | 699 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
997 | svc_sock_enqueue(svsk); | 700 | svc_xprt_enqueue(&svsk->sk_xprt); |
998 | } | 701 | } |
999 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 702 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
1000 | wake_up_interruptible(sk->sk_sleep); | 703 | wake_up_interruptible(sk->sk_sleep); |
1001 | } | 704 | } |
1002 | 705 | ||
1003 | static inline int svc_port_is_privileged(struct sockaddr *sin) | ||
1004 | { | ||
1005 | switch (sin->sa_family) { | ||
1006 | case AF_INET: | ||
1007 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | ||
1008 | < PROT_SOCK; | ||
1009 | case AF_INET6: | ||
1010 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | ||
1011 | < PROT_SOCK; | ||
1012 | default: | ||
1013 | return 0; | ||
1014 | } | ||
1015 | } | ||
1016 | |||
1017 | /* | 706 | /* |
1018 | * Accept a TCP connection | 707 | * Accept a TCP connection |
1019 | */ | 708 | */ |
1020 | static void | 709 | static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) |
1021 | svc_tcp_accept(struct svc_sock *svsk) | ||
1022 | { | 710 | { |
711 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
1023 | struct sockaddr_storage addr; | 712 | struct sockaddr_storage addr; |
1024 | struct sockaddr *sin = (struct sockaddr *) &addr; | 713 | struct sockaddr *sin = (struct sockaddr *) &addr; |
1025 | struct svc_serv *serv = svsk->sk_server; | 714 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; |
1026 | struct socket *sock = svsk->sk_sock; | 715 | struct socket *sock = svsk->sk_sock; |
1027 | struct socket *newsock; | 716 | struct socket *newsock; |
1028 | struct svc_sock *newsvsk; | 717 | struct svc_sock *newsvsk; |
@@ -1031,9 +720,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1031 | 720 | ||
1032 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); | 721 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); |
1033 | if (!sock) | 722 | if (!sock) |
1034 | return; | 723 | return NULL; |
1035 | 724 | ||
1036 | clear_bit(SK_CONN, &svsk->sk_flags); | 725 | clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
1037 | err = kernel_accept(sock, &newsock, O_NONBLOCK); | 726 | err = kernel_accept(sock, &newsock, O_NONBLOCK); |
1038 | if (err < 0) { | 727 | if (err < 0) { |
1039 | if (err == -ENOMEM) | 728 | if (err == -ENOMEM) |
@@ -1042,11 +731,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1042 | else if (err != -EAGAIN && net_ratelimit()) | 731 | else if (err != -EAGAIN && net_ratelimit()) |
1043 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", | 732 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", |
1044 | serv->sv_name, -err); | 733 | serv->sv_name, -err); |
1045 | return; | 734 | return NULL; |
1046 | } | 735 | } |
1047 | 736 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); | |
1048 | set_bit(SK_CONN, &svsk->sk_flags); | ||
1049 | svc_sock_enqueue(svsk); | ||
1050 | 737 | ||
1051 | err = kernel_getpeername(newsock, sin, &slen); | 738 | err = kernel_getpeername(newsock, sin, &slen); |
1052 | if (err < 0) { | 739 | if (err < 0) { |
@@ -1077,106 +764,42 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1077 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, | 764 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, |
1078 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) | 765 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) |
1079 | goto failed; | 766 | goto failed; |
1080 | memcpy(&newsvsk->sk_remote, sin, slen); | 767 | svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); |
1081 | newsvsk->sk_remotelen = slen; | ||
1082 | err = kernel_getsockname(newsock, sin, &slen); | 768 | err = kernel_getsockname(newsock, sin, &slen); |
1083 | if (unlikely(err < 0)) { | 769 | if (unlikely(err < 0)) { |
1084 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); | 770 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); |
1085 | slen = offsetof(struct sockaddr, sa_data); | 771 | slen = offsetof(struct sockaddr, sa_data); |
1086 | } | 772 | } |
1087 | memcpy(&newsvsk->sk_local, sin, slen); | 773 | svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); |
1088 | |||
1089 | svc_sock_received(newsvsk); | ||
1090 | |||
1091 | /* make sure that we don't have too many active connections. | ||
1092 | * If we have, something must be dropped. | ||
1093 | * | ||
1094 | * There's no point in trying to do random drop here for | ||
1095 | * DoS prevention. The NFS clients does 1 reconnect in 15 | ||
1096 | * seconds. An attacker can easily beat that. | ||
1097 | * | ||
1098 | * The only somewhat efficient mechanism would be if drop | ||
1099 | * old connections from the same IP first. But right now | ||
1100 | * we don't even record the client IP in svc_sock. | ||
1101 | */ | ||
1102 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
1103 | struct svc_sock *svsk = NULL; | ||
1104 | spin_lock_bh(&serv->sv_lock); | ||
1105 | if (!list_empty(&serv->sv_tempsocks)) { | ||
1106 | if (net_ratelimit()) { | ||
1107 | /* Try to help the admin */ | ||
1108 | printk(KERN_NOTICE "%s: too many open TCP " | ||
1109 | "sockets, consider increasing the " | ||
1110 | "number of nfsd threads\n", | ||
1111 | serv->sv_name); | ||
1112 | printk(KERN_NOTICE | ||
1113 | "%s: last TCP connect from %s\n", | ||
1114 | serv->sv_name, __svc_print_addr(sin, | ||
1115 | buf, sizeof(buf))); | ||
1116 | } | ||
1117 | /* | ||
1118 | * Always select the oldest socket. It's not fair, | ||
1119 | * but so is life | ||
1120 | */ | ||
1121 | svsk = list_entry(serv->sv_tempsocks.prev, | ||
1122 | struct svc_sock, | ||
1123 | sk_list); | ||
1124 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1125 | atomic_inc(&svsk->sk_inuse); | ||
1126 | } | ||
1127 | spin_unlock_bh(&serv->sv_lock); | ||
1128 | |||
1129 | if (svsk) { | ||
1130 | svc_sock_enqueue(svsk); | ||
1131 | svc_sock_put(svsk); | ||
1132 | } | ||
1133 | |||
1134 | } | ||
1135 | 774 | ||
1136 | if (serv->sv_stats) | 775 | if (serv->sv_stats) |
1137 | serv->sv_stats->nettcpconn++; | 776 | serv->sv_stats->nettcpconn++; |
1138 | 777 | ||
1139 | return; | 778 | return &newsvsk->sk_xprt; |
1140 | 779 | ||
1141 | failed: | 780 | failed: |
1142 | sock_release(newsock); | 781 | sock_release(newsock); |
1143 | return; | 782 | return NULL; |
1144 | } | 783 | } |
1145 | 784 | ||
1146 | /* | 785 | /* |
1147 | * Receive data from a TCP socket. | 786 | * Receive data from a TCP socket. |
1148 | */ | 787 | */ |
1149 | static int | 788 | static int svc_tcp_recvfrom(struct svc_rqst *rqstp) |
1150 | svc_tcp_recvfrom(struct svc_rqst *rqstp) | ||
1151 | { | 789 | { |
1152 | struct svc_sock *svsk = rqstp->rq_sock; | 790 | struct svc_sock *svsk = |
1153 | struct svc_serv *serv = svsk->sk_server; | 791 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
792 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
1154 | int len; | 793 | int len; |
1155 | struct kvec *vec; | 794 | struct kvec *vec; |
1156 | int pnum, vlen; | 795 | int pnum, vlen; |
1157 | 796 | ||
1158 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", | 797 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", |
1159 | svsk, test_bit(SK_DATA, &svsk->sk_flags), | 798 | svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), |
1160 | test_bit(SK_CONN, &svsk->sk_flags), | 799 | test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), |
1161 | test_bit(SK_CLOSE, &svsk->sk_flags)); | 800 | test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); |
1162 | 801 | ||
1163 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 802 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
1164 | svc_sock_received(svsk); | ||
1165 | return svc_deferred_recv(rqstp); | ||
1166 | } | ||
1167 | |||
1168 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
1169 | svc_delete_socket(svsk); | ||
1170 | return 0; | ||
1171 | } | ||
1172 | |||
1173 | if (svsk->sk_sk->sk_state == TCP_LISTEN) { | ||
1174 | svc_tcp_accept(svsk); | ||
1175 | svc_sock_received(svsk); | ||
1176 | return 0; | ||
1177 | } | ||
1178 | |||
1179 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | ||
1180 | /* sndbuf needs to have room for one request | 803 | /* sndbuf needs to have room for one request |
1181 | * per thread, otherwise we can stall even when the | 804 | * per thread, otherwise we can stall even when the |
1182 | * network isn't a bottleneck. | 805 | * network isn't a bottleneck. |
@@ -1193,7 +816,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1193 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 816 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
1194 | 3 * serv->sv_max_mesg); | 817 | 3 * serv->sv_max_mesg); |
1195 | 818 | ||
1196 | clear_bit(SK_DATA, &svsk->sk_flags); | 819 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1197 | 820 | ||
1198 | /* Receive data. If we haven't got the record length yet, get | 821 | /* Receive data. If we haven't got the record length yet, get |
1199 | * the next four bytes. Otherwise try to gobble up as much as | 822 | * the next four bytes. Otherwise try to gobble up as much as |
@@ -1212,7 +835,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1212 | if (len < want) { | 835 | if (len < want) { |
1213 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", | 836 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", |
1214 | len, want); | 837 | len, want); |
1215 | svc_sock_received(svsk); | 838 | svc_xprt_received(&svsk->sk_xprt); |
1216 | return -EAGAIN; /* record header not complete */ | 839 | return -EAGAIN; /* record header not complete */ |
1217 | } | 840 | } |
1218 | 841 | ||
@@ -1248,11 +871,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1248 | if (len < svsk->sk_reclen) { | 871 | if (len < svsk->sk_reclen) { |
1249 | dprintk("svc: incomplete TCP record (%d of %d)\n", | 872 | dprintk("svc: incomplete TCP record (%d of %d)\n", |
1250 | len, svsk->sk_reclen); | 873 | len, svsk->sk_reclen); |
1251 | svc_sock_received(svsk); | 874 | svc_xprt_received(&svsk->sk_xprt); |
1252 | return -EAGAIN; /* record not complete */ | 875 | return -EAGAIN; /* record not complete */ |
1253 | } | 876 | } |
1254 | len = svsk->sk_reclen; | 877 | len = svsk->sk_reclen; |
1255 | set_bit(SK_DATA, &svsk->sk_flags); | 878 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1256 | 879 | ||
1257 | vec = rqstp->rq_vec; | 880 | vec = rqstp->rq_vec; |
1258 | vec[0] = rqstp->rq_arg.head[0]; | 881 | vec[0] = rqstp->rq_arg.head[0]; |
@@ -1281,30 +904,31 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1281 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | 904 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; |
1282 | } | 905 | } |
1283 | 906 | ||
1284 | rqstp->rq_skbuff = NULL; | 907 | rqstp->rq_xprt_ctxt = NULL; |
1285 | rqstp->rq_prot = IPPROTO_TCP; | 908 | rqstp->rq_prot = IPPROTO_TCP; |
1286 | 909 | ||
1287 | /* Reset TCP read info */ | 910 | /* Reset TCP read info */ |
1288 | svsk->sk_reclen = 0; | 911 | svsk->sk_reclen = 0; |
1289 | svsk->sk_tcplen = 0; | 912 | svsk->sk_tcplen = 0; |
1290 | 913 | ||
1291 | svc_sock_received(svsk); | 914 | svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); |
915 | svc_xprt_received(&svsk->sk_xprt); | ||
1292 | if (serv->sv_stats) | 916 | if (serv->sv_stats) |
1293 | serv->sv_stats->nettcpcnt++; | 917 | serv->sv_stats->nettcpcnt++; |
1294 | 918 | ||
1295 | return len; | 919 | return len; |
1296 | 920 | ||
1297 | err_delete: | 921 | err_delete: |
1298 | svc_delete_socket(svsk); | 922 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
1299 | return -EAGAIN; | 923 | return -EAGAIN; |
1300 | 924 | ||
1301 | error: | 925 | error: |
1302 | if (len == -EAGAIN) { | 926 | if (len == -EAGAIN) { |
1303 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); | 927 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); |
1304 | svc_sock_received(svsk); | 928 | svc_xprt_received(&svsk->sk_xprt); |
1305 | } else { | 929 | } else { |
1306 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", | 930 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", |
1307 | svsk->sk_server->sv_name, -len); | 931 | svsk->sk_xprt.xpt_server->sv_name, -len); |
1308 | goto err_delete; | 932 | goto err_delete; |
1309 | } | 933 | } |
1310 | 934 | ||
@@ -1314,8 +938,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1314 | /* | 938 | /* |
1315 | * Send out data on TCP socket. | 939 | * Send out data on TCP socket. |
1316 | */ | 940 | */ |
1317 | static int | 941 | static int svc_tcp_sendto(struct svc_rqst *rqstp) |
1318 | svc_tcp_sendto(struct svc_rqst *rqstp) | ||
1319 | { | 942 | { |
1320 | struct xdr_buf *xbufp = &rqstp->rq_res; | 943 | struct xdr_buf *xbufp = &rqstp->rq_res; |
1321 | int sent; | 944 | int sent; |
@@ -1328,35 +951,109 @@ svc_tcp_sendto(struct svc_rqst *rqstp) | |||
1328 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); | 951 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); |
1329 | memcpy(xbufp->head[0].iov_base, &reclen, 4); | 952 | memcpy(xbufp->head[0].iov_base, &reclen, 4); |
1330 | 953 | ||
1331 | if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) | 954 | if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) |
1332 | return -ENOTCONN; | 955 | return -ENOTCONN; |
1333 | 956 | ||
1334 | sent = svc_sendto(rqstp, &rqstp->rq_res); | 957 | sent = svc_sendto(rqstp, &rqstp->rq_res); |
1335 | if (sent != xbufp->len) { | 958 | if (sent != xbufp->len) { |
1336 | printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", | 959 | printk(KERN_NOTICE |
1337 | rqstp->rq_sock->sk_server->sv_name, | 960 | "rpc-srv/tcp: %s: %s %d when sending %d bytes " |
961 | "- shutting down socket\n", | ||
962 | rqstp->rq_xprt->xpt_server->sv_name, | ||
1338 | (sent<0)?"got error":"sent only", | 963 | (sent<0)?"got error":"sent only", |
1339 | sent, xbufp->len); | 964 | sent, xbufp->len); |
1340 | set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); | 965 | set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags); |
1341 | svc_sock_enqueue(rqstp->rq_sock); | 966 | svc_xprt_enqueue(rqstp->rq_xprt); |
1342 | sent = -EAGAIN; | 967 | sent = -EAGAIN; |
1343 | } | 968 | } |
1344 | return sent; | 969 | return sent; |
1345 | } | 970 | } |
1346 | 971 | ||
1347 | static void | 972 | /* |
1348 | svc_tcp_init(struct svc_sock *svsk) | 973 | * Setup response header. TCP has a 4B record length field. |
974 | */ | ||
975 | static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) | ||
976 | { | ||
977 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
978 | |||
979 | /* tcp needs a space for the record length... */ | ||
980 | svc_putnl(resv, 0); | ||
981 | } | ||
982 | |||
983 | static int svc_tcp_has_wspace(struct svc_xprt *xprt) | ||
984 | { | ||
985 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
986 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
987 | int required; | ||
988 | int wspace; | ||
989 | |||
990 | /* | ||
991 | * Set the SOCK_NOSPACE flag before checking the available | ||
992 | * sock space. | ||
993 | */ | ||
994 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
995 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
996 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
997 | |||
998 | if (wspace < sk_stream_min_wspace(svsk->sk_sk)) | ||
999 | return 0; | ||
1000 | if (required * 2 > wspace) | ||
1001 | return 0; | ||
1002 | |||
1003 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
1004 | return 1; | ||
1005 | } | ||
1006 | |||
1007 | static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, | ||
1008 | struct sockaddr *sa, int salen, | ||
1009 | int flags) | ||
1010 | { | ||
1011 | return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); | ||
1012 | } | ||
1013 | |||
1014 | static struct svc_xprt_ops svc_tcp_ops = { | ||
1015 | .xpo_create = svc_tcp_create, | ||
1016 | .xpo_recvfrom = svc_tcp_recvfrom, | ||
1017 | .xpo_sendto = svc_tcp_sendto, | ||
1018 | .xpo_release_rqst = svc_release_skb, | ||
1019 | .xpo_detach = svc_sock_detach, | ||
1020 | .xpo_free = svc_sock_free, | ||
1021 | .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, | ||
1022 | .xpo_has_wspace = svc_tcp_has_wspace, | ||
1023 | .xpo_accept = svc_tcp_accept, | ||
1024 | }; | ||
1025 | |||
1026 | static struct svc_xprt_class svc_tcp_class = { | ||
1027 | .xcl_name = "tcp", | ||
1028 | .xcl_owner = THIS_MODULE, | ||
1029 | .xcl_ops = &svc_tcp_ops, | ||
1030 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, | ||
1031 | }; | ||
1032 | |||
1033 | void svc_init_xprt_sock(void) | ||
1034 | { | ||
1035 | svc_reg_xprt_class(&svc_tcp_class); | ||
1036 | svc_reg_xprt_class(&svc_udp_class); | ||
1037 | } | ||
1038 | |||
1039 | void svc_cleanup_xprt_sock(void) | ||
1040 | { | ||
1041 | svc_unreg_xprt_class(&svc_tcp_class); | ||
1042 | svc_unreg_xprt_class(&svc_udp_class); | ||
1043 | } | ||
1044 | |||
1045 | static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
1349 | { | 1046 | { |
1350 | struct sock *sk = svsk->sk_sk; | 1047 | struct sock *sk = svsk->sk_sk; |
1351 | struct tcp_sock *tp = tcp_sk(sk); | 1048 | struct tcp_sock *tp = tcp_sk(sk); |
1352 | 1049 | ||
1353 | svsk->sk_recvfrom = svc_tcp_recvfrom; | 1050 | svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); |
1354 | svsk->sk_sendto = svc_tcp_sendto; | 1051 | set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); |
1355 | |||
1356 | if (sk->sk_state == TCP_LISTEN) { | 1052 | if (sk->sk_state == TCP_LISTEN) { |
1357 | dprintk("setting up TCP socket for listening\n"); | 1053 | dprintk("setting up TCP socket for listening\n"); |
1054 | set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); | ||
1358 | sk->sk_data_ready = svc_tcp_listen_data_ready; | 1055 | sk->sk_data_ready = svc_tcp_listen_data_ready; |
1359 | set_bit(SK_CONN, &svsk->sk_flags); | 1056 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
1360 | } else { | 1057 | } else { |
1361 | dprintk("setting up TCP socket for reading\n"); | 1058 | dprintk("setting up TCP socket for reading\n"); |
1362 | sk->sk_state_change = svc_tcp_state_change; | 1059 | sk->sk_state_change = svc_tcp_state_change; |
@@ -1373,18 +1070,17 @@ svc_tcp_init(struct svc_sock *svsk) | |||
1373 | * svc_tcp_recvfrom will re-adjust if necessary | 1070 | * svc_tcp_recvfrom will re-adjust if necessary |
1374 | */ | 1071 | */ |
1375 | svc_sock_setbufsize(svsk->sk_sock, | 1072 | svc_sock_setbufsize(svsk->sk_sock, |
1376 | 3 * svsk->sk_server->sv_max_mesg, | 1073 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
1377 | 3 * svsk->sk_server->sv_max_mesg); | 1074 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
1378 | 1075 | ||
1379 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1076 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1380 | set_bit(SK_DATA, &svsk->sk_flags); | 1077 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1381 | if (sk->sk_state != TCP_ESTABLISHED) | 1078 | if (sk->sk_state != TCP_ESTABLISHED) |
1382 | set_bit(SK_CLOSE, &svsk->sk_flags); | 1079 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
1383 | } | 1080 | } |
1384 | } | 1081 | } |
1385 | 1082 | ||
1386 | void | 1083 | void svc_sock_update_bufs(struct svc_serv *serv) |
1387 | svc_sock_update_bufs(struct svc_serv *serv) | ||
1388 | { | 1084 | { |
1389 | /* | 1085 | /* |
1390 | * The number of server threads has changed. Update | 1086 | * The number of server threads has changed. Update |
@@ -1395,232 +1091,18 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
1395 | spin_lock_bh(&serv->sv_lock); | 1091 | spin_lock_bh(&serv->sv_lock); |
1396 | list_for_each(le, &serv->sv_permsocks) { | 1092 | list_for_each(le, &serv->sv_permsocks) { |
1397 | struct svc_sock *svsk = | 1093 | struct svc_sock *svsk = |
1398 | list_entry(le, struct svc_sock, sk_list); | 1094 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
1399 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1095 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1400 | } | 1096 | } |
1401 | list_for_each(le, &serv->sv_tempsocks) { | 1097 | list_for_each(le, &serv->sv_tempsocks) { |
1402 | struct svc_sock *svsk = | 1098 | struct svc_sock *svsk = |
1403 | list_entry(le, struct svc_sock, sk_list); | 1099 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
1404 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1100 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1405 | } | 1101 | } |
1406 | spin_unlock_bh(&serv->sv_lock); | 1102 | spin_unlock_bh(&serv->sv_lock); |
1407 | } | 1103 | } |
1408 | 1104 | ||
1409 | /* | 1105 | /* |
1410 | * Receive the next request on any socket. This code is carefully | ||
1411 | * organised not to touch any cachelines in the shared svc_serv | ||
1412 | * structure, only cachelines in the local svc_pool. | ||
1413 | */ | ||
1414 | int | ||
1415 | svc_recv(struct svc_rqst *rqstp, long timeout) | ||
1416 | { | ||
1417 | struct svc_sock *svsk = NULL; | ||
1418 | struct svc_serv *serv = rqstp->rq_server; | ||
1419 | struct svc_pool *pool = rqstp->rq_pool; | ||
1420 | int len, i; | ||
1421 | int pages; | ||
1422 | struct xdr_buf *arg; | ||
1423 | DECLARE_WAITQUEUE(wait, current); | ||
1424 | |||
1425 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
1426 | rqstp, timeout); | ||
1427 | |||
1428 | if (rqstp->rq_sock) | ||
1429 | printk(KERN_ERR | ||
1430 | "svc_recv: service %p, socket not NULL!\n", | ||
1431 | rqstp); | ||
1432 | if (waitqueue_active(&rqstp->rq_wait)) | ||
1433 | printk(KERN_ERR | ||
1434 | "svc_recv: service %p, wait queue active!\n", | ||
1435 | rqstp); | ||
1436 | |||
1437 | |||
1438 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
1439 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | ||
1440 | for (i=0; i < pages ; i++) | ||
1441 | while (rqstp->rq_pages[i] == NULL) { | ||
1442 | struct page *p = alloc_page(GFP_KERNEL); | ||
1443 | if (!p) | ||
1444 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | ||
1445 | rqstp->rq_pages[i] = p; | ||
1446 | } | ||
1447 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | ||
1448 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
1449 | |||
1450 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
1451 | arg = &rqstp->rq_arg; | ||
1452 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | ||
1453 | arg->head[0].iov_len = PAGE_SIZE; | ||
1454 | arg->pages = rqstp->rq_pages + 1; | ||
1455 | arg->page_base = 0; | ||
1456 | /* save at least one page for response */ | ||
1457 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
1458 | arg->len = (pages-1)*PAGE_SIZE; | ||
1459 | arg->tail[0].iov_len = 0; | ||
1460 | |||
1461 | try_to_freeze(); | ||
1462 | cond_resched(); | ||
1463 | if (signalled()) | ||
1464 | return -EINTR; | ||
1465 | |||
1466 | spin_lock_bh(&pool->sp_lock); | ||
1467 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { | ||
1468 | rqstp->rq_sock = svsk; | ||
1469 | atomic_inc(&svsk->sk_inuse); | ||
1470 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
1471 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
1472 | } else { | ||
1473 | /* No data pending. Go to sleep */ | ||
1474 | svc_thread_enqueue(pool, rqstp); | ||
1475 | |||
1476 | /* | ||
1477 | * We have to be able to interrupt this wait | ||
1478 | * to bring down the daemons ... | ||
1479 | */ | ||
1480 | set_current_state(TASK_INTERRUPTIBLE); | ||
1481 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
1482 | spin_unlock_bh(&pool->sp_lock); | ||
1483 | |||
1484 | schedule_timeout(timeout); | ||
1485 | |||
1486 | try_to_freeze(); | ||
1487 | |||
1488 | spin_lock_bh(&pool->sp_lock); | ||
1489 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
1490 | |||
1491 | if (!(svsk = rqstp->rq_sock)) { | ||
1492 | svc_thread_dequeue(pool, rqstp); | ||
1493 | spin_unlock_bh(&pool->sp_lock); | ||
1494 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
1495 | return signalled()? -EINTR : -EAGAIN; | ||
1496 | } | ||
1497 | } | ||
1498 | spin_unlock_bh(&pool->sp_lock); | ||
1499 | |||
1500 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", | ||
1501 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); | ||
1502 | len = svsk->sk_recvfrom(rqstp); | ||
1503 | dprintk("svc: got len=%d\n", len); | ||
1504 | |||
1505 | /* No data, incomplete (TCP) read, or accept() */ | ||
1506 | if (len == 0 || len == -EAGAIN) { | ||
1507 | rqstp->rq_res.len = 0; | ||
1508 | svc_sock_release(rqstp); | ||
1509 | return -EAGAIN; | ||
1510 | } | ||
1511 | svsk->sk_lastrecv = get_seconds(); | ||
1512 | clear_bit(SK_OLD, &svsk->sk_flags); | ||
1513 | |||
1514 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | ||
1515 | rqstp->rq_chandle.defer = svc_defer; | ||
1516 | |||
1517 | if (serv->sv_stats) | ||
1518 | serv->sv_stats->netcnt++; | ||
1519 | return len; | ||
1520 | } | ||
1521 | |||
1522 | /* | ||
1523 | * Drop request | ||
1524 | */ | ||
1525 | void | ||
1526 | svc_drop(struct svc_rqst *rqstp) | ||
1527 | { | ||
1528 | dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); | ||
1529 | svc_sock_release(rqstp); | ||
1530 | } | ||
1531 | |||
1532 | /* | ||
1533 | * Return reply to client. | ||
1534 | */ | ||
1535 | int | ||
1536 | svc_send(struct svc_rqst *rqstp) | ||
1537 | { | ||
1538 | struct svc_sock *svsk; | ||
1539 | int len; | ||
1540 | struct xdr_buf *xb; | ||
1541 | |||
1542 | if ((svsk = rqstp->rq_sock) == NULL) { | ||
1543 | printk(KERN_WARNING "NULL socket pointer in %s:%d\n", | ||
1544 | __FILE__, __LINE__); | ||
1545 | return -EFAULT; | ||
1546 | } | ||
1547 | |||
1548 | /* release the receive skb before sending the reply */ | ||
1549 | svc_release_skb(rqstp); | ||
1550 | |||
1551 | /* calculate over-all length */ | ||
1552 | xb = & rqstp->rq_res; | ||
1553 | xb->len = xb->head[0].iov_len + | ||
1554 | xb->page_len + | ||
1555 | xb->tail[0].iov_len; | ||
1556 | |||
1557 | /* Grab svsk->sk_mutex to serialize outgoing data. */ | ||
1558 | mutex_lock(&svsk->sk_mutex); | ||
1559 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
1560 | len = -ENOTCONN; | ||
1561 | else | ||
1562 | len = svsk->sk_sendto(rqstp); | ||
1563 | mutex_unlock(&svsk->sk_mutex); | ||
1564 | svc_sock_release(rqstp); | ||
1565 | |||
1566 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
1567 | return 0; | ||
1568 | return len; | ||
1569 | } | ||
1570 | |||
1571 | /* | ||
1572 | * Timer function to close old temporary sockets, using | ||
1573 | * a mark-and-sweep algorithm. | ||
1574 | */ | ||
1575 | static void | ||
1576 | svc_age_temp_sockets(unsigned long closure) | ||
1577 | { | ||
1578 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
1579 | struct svc_sock *svsk; | ||
1580 | struct list_head *le, *next; | ||
1581 | LIST_HEAD(to_be_aged); | ||
1582 | |||
1583 | dprintk("svc_age_temp_sockets\n"); | ||
1584 | |||
1585 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
1586 | /* busy, try again 1 sec later */ | ||
1587 | dprintk("svc_age_temp_sockets: busy\n"); | ||
1588 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
1589 | return; | ||
1590 | } | ||
1591 | |||
1592 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
1593 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1594 | |||
1595 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) | ||
1596 | continue; | ||
1597 | if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
1598 | continue; | ||
1599 | atomic_inc(&svsk->sk_inuse); | ||
1600 | list_move(le, &to_be_aged); | ||
1601 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1602 | set_bit(SK_DETACHED, &svsk->sk_flags); | ||
1603 | } | ||
1604 | spin_unlock_bh(&serv->sv_lock); | ||
1605 | |||
1606 | while (!list_empty(&to_be_aged)) { | ||
1607 | le = to_be_aged.next; | ||
1608 | /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ | ||
1609 | list_del_init(le); | ||
1610 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1611 | |||
1612 | dprintk("queuing svsk %p for closing, %lu seconds old\n", | ||
1613 | svsk, get_seconds() - svsk->sk_lastrecv); | ||
1614 | |||
1615 | /* a thread will dequeue and close it soon */ | ||
1616 | svc_sock_enqueue(svsk); | ||
1617 | svc_sock_put(svsk); | ||
1618 | } | ||
1619 | |||
1620 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
1621 | } | ||
1622 | |||
1623 | /* | ||
1624 | * Initialize socket for RPC use and create svc_sock struct | 1106 | * Initialize socket for RPC use and create svc_sock struct |
1625 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. | 1107 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. |
1626 | */ | 1108 | */ |
@@ -1631,7 +1113,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1631 | struct svc_sock *svsk; | 1113 | struct svc_sock *svsk; |
1632 | struct sock *inet; | 1114 | struct sock *inet; |
1633 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); | 1115 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); |
1634 | int is_temporary = flags & SVC_SOCK_TEMPORARY; | ||
1635 | 1116 | ||
1636 | dprintk("svc: svc_setup_socket %p\n", sock); | 1117 | dprintk("svc: svc_setup_socket %p\n", sock); |
1637 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { | 1118 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { |
@@ -1651,44 +1132,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1651 | return NULL; | 1132 | return NULL; |
1652 | } | 1133 | } |
1653 | 1134 | ||
1654 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
1655 | inet->sk_user_data = svsk; | 1135 | inet->sk_user_data = svsk; |
1656 | svsk->sk_sock = sock; | 1136 | svsk->sk_sock = sock; |
1657 | svsk->sk_sk = inet; | 1137 | svsk->sk_sk = inet; |
1658 | svsk->sk_ostate = inet->sk_state_change; | 1138 | svsk->sk_ostate = inet->sk_state_change; |
1659 | svsk->sk_odata = inet->sk_data_ready; | 1139 | svsk->sk_odata = inet->sk_data_ready; |
1660 | svsk->sk_owspace = inet->sk_write_space; | 1140 | svsk->sk_owspace = inet->sk_write_space; |
1661 | svsk->sk_server = serv; | ||
1662 | atomic_set(&svsk->sk_inuse, 1); | ||
1663 | svsk->sk_lastrecv = get_seconds(); | ||
1664 | spin_lock_init(&svsk->sk_lock); | ||
1665 | INIT_LIST_HEAD(&svsk->sk_deferred); | ||
1666 | INIT_LIST_HEAD(&svsk->sk_ready); | ||
1667 | mutex_init(&svsk->sk_mutex); | ||
1668 | 1141 | ||
1669 | /* Initialize the socket */ | 1142 | /* Initialize the socket */ |
1670 | if (sock->type == SOCK_DGRAM) | 1143 | if (sock->type == SOCK_DGRAM) |
1671 | svc_udp_init(svsk); | 1144 | svc_udp_init(svsk, serv); |
1672 | else | 1145 | else |
1673 | svc_tcp_init(svsk); | 1146 | svc_tcp_init(svsk, serv); |
1674 | |||
1675 | spin_lock_bh(&serv->sv_lock); | ||
1676 | if (is_temporary) { | ||
1677 | set_bit(SK_TEMP, &svsk->sk_flags); | ||
1678 | list_add(&svsk->sk_list, &serv->sv_tempsocks); | ||
1679 | serv->sv_tmpcnt++; | ||
1680 | if (serv->sv_temptimer.function == NULL) { | ||
1681 | /* setup timer to age temp sockets */ | ||
1682 | setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, | ||
1683 | (unsigned long)serv); | ||
1684 | mod_timer(&serv->sv_temptimer, | ||
1685 | jiffies + svc_conn_age_period * HZ); | ||
1686 | } | ||
1687 | } else { | ||
1688 | clear_bit(SK_TEMP, &svsk->sk_flags); | ||
1689 | list_add(&svsk->sk_list, &serv->sv_permsocks); | ||
1690 | } | ||
1691 | spin_unlock_bh(&serv->sv_lock); | ||
1692 | 1147 | ||
1693 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | 1148 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
1694 | svsk, svsk->sk_sk); | 1149 | svsk, svsk->sk_sk); |
@@ -1717,7 +1172,16 @@ int svc_addsock(struct svc_serv *serv, | |||
1717 | else { | 1172 | else { |
1718 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); | 1173 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); |
1719 | if (svsk) { | 1174 | if (svsk) { |
1720 | svc_sock_received(svsk); | 1175 | struct sockaddr_storage addr; |
1176 | struct sockaddr *sin = (struct sockaddr *)&addr; | ||
1177 | int salen; | ||
1178 | if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) | ||
1179 | svc_xprt_set_local(&svsk->sk_xprt, sin, salen); | ||
1180 | clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); | ||
1181 | spin_lock_bh(&serv->sv_lock); | ||
1182 | list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); | ||
1183 | spin_unlock_bh(&serv->sv_lock); | ||
1184 | svc_xprt_received(&svsk->sk_xprt); | ||
1721 | err = 0; | 1185 | err = 0; |
1722 | } | 1186 | } |
1723 | } | 1187 | } |
@@ -1733,14 +1197,19 @@ EXPORT_SYMBOL_GPL(svc_addsock); | |||
1733 | /* | 1197 | /* |
1734 | * Create socket for RPC service. | 1198 | * Create socket for RPC service. |
1735 | */ | 1199 | */ |
1736 | static int svc_create_socket(struct svc_serv *serv, int protocol, | 1200 | static struct svc_xprt *svc_create_socket(struct svc_serv *serv, |
1737 | struct sockaddr *sin, int len, int flags) | 1201 | int protocol, |
1202 | struct sockaddr *sin, int len, | ||
1203 | int flags) | ||
1738 | { | 1204 | { |
1739 | struct svc_sock *svsk; | 1205 | struct svc_sock *svsk; |
1740 | struct socket *sock; | 1206 | struct socket *sock; |
1741 | int error; | 1207 | int error; |
1742 | int type; | 1208 | int type; |
1743 | char buf[RPC_MAX_ADDRBUFLEN]; | 1209 | char buf[RPC_MAX_ADDRBUFLEN]; |
1210 | struct sockaddr_storage addr; | ||
1211 | struct sockaddr *newsin = (struct sockaddr *)&addr; | ||
1212 | int newlen; | ||
1744 | 1213 | ||
1745 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", | 1214 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", |
1746 | serv->sv_program->pg_name, protocol, | 1215 | serv->sv_program->pg_name, protocol, |
@@ -1749,13 +1218,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
1749 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { | 1218 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { |
1750 | printk(KERN_WARNING "svc: only UDP and TCP " | 1219 | printk(KERN_WARNING "svc: only UDP and TCP " |
1751 | "sockets supported\n"); | 1220 | "sockets supported\n"); |
1752 | return -EINVAL; | 1221 | return ERR_PTR(-EINVAL); |
1753 | } | 1222 | } |
1754 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | 1223 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; |
1755 | 1224 | ||
1756 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); | 1225 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); |
1757 | if (error < 0) | 1226 | if (error < 0) |
1758 | return error; | 1227 | return ERR_PTR(error); |
1759 | 1228 | ||
1760 | svc_reclassify_socket(sock); | 1229 | svc_reclassify_socket(sock); |
1761 | 1230 | ||
@@ -1765,203 +1234,55 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
1765 | if (error < 0) | 1234 | if (error < 0) |
1766 | goto bummer; | 1235 | goto bummer; |
1767 | 1236 | ||
1237 | newlen = len; | ||
1238 | error = kernel_getsockname(sock, newsin, &newlen); | ||
1239 | if (error < 0) | ||
1240 | goto bummer; | ||
1241 | |||
1768 | if (protocol == IPPROTO_TCP) { | 1242 | if (protocol == IPPROTO_TCP) { |
1769 | if ((error = kernel_listen(sock, 64)) < 0) | 1243 | if ((error = kernel_listen(sock, 64)) < 0) |
1770 | goto bummer; | 1244 | goto bummer; |
1771 | } | 1245 | } |
1772 | 1246 | ||
1773 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { | 1247 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { |
1774 | svc_sock_received(svsk); | 1248 | svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); |
1775 | return ntohs(inet_sk(svsk->sk_sk)->sport); | 1249 | return (struct svc_xprt *)svsk; |
1776 | } | 1250 | } |
1777 | 1251 | ||
1778 | bummer: | 1252 | bummer: |
1779 | dprintk("svc: svc_create_socket error = %d\n", -error); | 1253 | dprintk("svc: svc_create_socket error = %d\n", -error); |
1780 | sock_release(sock); | 1254 | sock_release(sock); |
1781 | return error; | 1255 | return ERR_PTR(error); |
1782 | } | 1256 | } |
1783 | 1257 | ||
1784 | /* | 1258 | /* |
1785 | * Remove a dead socket | 1259 | * Detach the svc_sock from the socket so that no |
1260 | * more callbacks occur. | ||
1786 | */ | 1261 | */ |
1787 | static void | 1262 | static void svc_sock_detach(struct svc_xprt *xprt) |
1788 | svc_delete_socket(struct svc_sock *svsk) | ||
1789 | { | 1263 | { |
1790 | struct svc_serv *serv; | 1264 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
1791 | struct sock *sk; | 1265 | struct sock *sk = svsk->sk_sk; |
1792 | |||
1793 | dprintk("svc: svc_delete_socket(%p)\n", svsk); | ||
1794 | 1266 | ||
1795 | serv = svsk->sk_server; | 1267 | dprintk("svc: svc_sock_detach(%p)\n", svsk); |
1796 | sk = svsk->sk_sk; | ||
1797 | 1268 | ||
1269 | /* put back the old socket callbacks */ | ||
1798 | sk->sk_state_change = svsk->sk_ostate; | 1270 | sk->sk_state_change = svsk->sk_ostate; |
1799 | sk->sk_data_ready = svsk->sk_odata; | 1271 | sk->sk_data_ready = svsk->sk_odata; |
1800 | sk->sk_write_space = svsk->sk_owspace; | 1272 | sk->sk_write_space = svsk->sk_owspace; |
1801 | |||
1802 | spin_lock_bh(&serv->sv_lock); | ||
1803 | |||
1804 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | ||
1805 | list_del_init(&svsk->sk_list); | ||
1806 | /* | ||
1807 | * We used to delete the svc_sock from whichever list | ||
1808 | * it's sk_ready node was on, but we don't actually | ||
1809 | * need to. This is because the only time we're called | ||
1810 | * while still attached to a queue, the queue itself | ||
1811 | * is about to be destroyed (in svc_destroy). | ||
1812 | */ | ||
1813 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { | ||
1814 | BUG_ON(atomic_read(&svsk->sk_inuse)<2); | ||
1815 | atomic_dec(&svsk->sk_inuse); | ||
1816 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | ||
1817 | serv->sv_tmpcnt--; | ||
1818 | } | ||
1819 | |||
1820 | spin_unlock_bh(&serv->sv_lock); | ||
1821 | } | ||
1822 | |||
1823 | static void svc_close_socket(struct svc_sock *svsk) | ||
1824 | { | ||
1825 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1826 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) | ||
1827 | /* someone else will have to effect the close */ | ||
1828 | return; | ||
1829 | |||
1830 | atomic_inc(&svsk->sk_inuse); | ||
1831 | svc_delete_socket(svsk); | ||
1832 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1833 | svc_sock_put(svsk); | ||
1834 | } | ||
1835 | |||
1836 | void svc_force_close_socket(struct svc_sock *svsk) | ||
1837 | { | ||
1838 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1839 | if (test_bit(SK_BUSY, &svsk->sk_flags)) { | ||
1840 | /* Waiting to be processed, but no threads left, | ||
1841 | * So just remove it from the waiting list | ||
1842 | */ | ||
1843 | list_del_init(&svsk->sk_ready); | ||
1844 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1845 | } | ||
1846 | svc_close_socket(svsk); | ||
1847 | } | ||
1848 | |||
1849 | /** | ||
1850 | * svc_makesock - Make a socket for nfsd and lockd | ||
1851 | * @serv: RPC server structure | ||
1852 | * @protocol: transport protocol to use | ||
1853 | * @port: port to use | ||
1854 | * @flags: requested socket characteristics | ||
1855 | * | ||
1856 | */ | ||
1857 | int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port, | ||
1858 | int flags) | ||
1859 | { | ||
1860 | struct sockaddr_in sin = { | ||
1861 | .sin_family = AF_INET, | ||
1862 | .sin_addr.s_addr = INADDR_ANY, | ||
1863 | .sin_port = htons(port), | ||
1864 | }; | ||
1865 | |||
1866 | dprintk("svc: creating socket proto = %d\n", protocol); | ||
1867 | return svc_create_socket(serv, protocol, (struct sockaddr *) &sin, | ||
1868 | sizeof(sin), flags); | ||
1869 | } | 1273 | } |
1870 | 1274 | ||
1871 | /* | 1275 | /* |
1872 | * Handle defer and revisit of requests | 1276 | * Free the svc_sock's socket resources and the svc_sock itself. |
1873 | */ | 1277 | */ |
1874 | 1278 | static void svc_sock_free(struct svc_xprt *xprt) | |
1875 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
1876 | { | 1279 | { |
1877 | struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); | 1280 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
1878 | struct svc_sock *svsk; | 1281 | dprintk("svc: svc_sock_free(%p)\n", svsk); |
1879 | 1282 | ||
1880 | if (too_many) { | 1283 | if (svsk->sk_sock->file) |
1881 | svc_sock_put(dr->svsk); | 1284 | sockfd_put(svsk->sk_sock); |
1882 | kfree(dr); | 1285 | else |
1883 | return; | 1286 | sock_release(svsk->sk_sock); |
1884 | } | 1287 | kfree(svsk); |
1885 | dprintk("revisit queued\n"); | ||
1886 | svsk = dr->svsk; | ||
1887 | dr->svsk = NULL; | ||
1888 | spin_lock(&svsk->sk_lock); | ||
1889 | list_add(&dr->handle.recent, &svsk->sk_deferred); | ||
1890 | spin_unlock(&svsk->sk_lock); | ||
1891 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1892 | svc_sock_enqueue(svsk); | ||
1893 | svc_sock_put(svsk); | ||
1894 | } | ||
1895 | |||
1896 | static struct cache_deferred_req * | ||
1897 | svc_defer(struct cache_req *req) | ||
1898 | { | ||
1899 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
1900 | int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len); | ||
1901 | struct svc_deferred_req *dr; | ||
1902 | |||
1903 | if (rqstp->rq_arg.page_len) | ||
1904 | return NULL; /* if more than a page, give up FIXME */ | ||
1905 | if (rqstp->rq_deferred) { | ||
1906 | dr = rqstp->rq_deferred; | ||
1907 | rqstp->rq_deferred = NULL; | ||
1908 | } else { | ||
1909 | int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
1910 | /* FIXME maybe discard if size too large */ | ||
1911 | dr = kmalloc(size, GFP_KERNEL); | ||
1912 | if (dr == NULL) | ||
1913 | return NULL; | ||
1914 | |||
1915 | dr->handle.owner = rqstp->rq_server; | ||
1916 | dr->prot = rqstp->rq_prot; | ||
1917 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | ||
1918 | dr->addrlen = rqstp->rq_addrlen; | ||
1919 | dr->daddr = rqstp->rq_daddr; | ||
1920 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
1921 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); | ||
1922 | } | ||
1923 | atomic_inc(&rqstp->rq_sock->sk_inuse); | ||
1924 | dr->svsk = rqstp->rq_sock; | ||
1925 | |||
1926 | dr->handle.revisit = svc_revisit; | ||
1927 | return &dr->handle; | ||
1928 | } | ||
1929 | |||
1930 | /* | ||
1931 | * recv data from a deferred request into an active one | ||
1932 | */ | ||
1933 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
1934 | { | ||
1935 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
1936 | |||
1937 | rqstp->rq_arg.head[0].iov_base = dr->args; | ||
1938 | rqstp->rq_arg.head[0].iov_len = dr->argslen<<2; | ||
1939 | rqstp->rq_arg.page_len = 0; | ||
1940 | rqstp->rq_arg.len = dr->argslen<<2; | ||
1941 | rqstp->rq_prot = dr->prot; | ||
1942 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | ||
1943 | rqstp->rq_addrlen = dr->addrlen; | ||
1944 | rqstp->rq_daddr = dr->daddr; | ||
1945 | rqstp->rq_respages = rqstp->rq_pages; | ||
1946 | return dr->argslen<<2; | ||
1947 | } | ||
1948 | |||
1949 | |||
1950 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | ||
1951 | { | ||
1952 | struct svc_deferred_req *dr = NULL; | ||
1953 | |||
1954 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) | ||
1955 | return NULL; | ||
1956 | spin_lock(&svsk->sk_lock); | ||
1957 | clear_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1958 | if (!list_empty(&svsk->sk_deferred)) { | ||
1959 | dr = list_entry(svsk->sk_deferred.next, | ||
1960 | struct svc_deferred_req, | ||
1961 | handle.recent); | ||
1962 | list_del_init(&dr->handle.recent); | ||
1963 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1964 | } | ||
1965 | spin_unlock(&svsk->sk_lock); | ||
1966 | return dr; | ||
1967 | } | 1288 | } |
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index bada7de0c2fc..0f8c439b848a 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/sunrpc/types.h> | 18 | #include <linux/sunrpc/types.h> |
19 | #include <linux/sunrpc/sched.h> | 19 | #include <linux/sunrpc/sched.h> |
20 | #include <linux/sunrpc/stats.h> | 20 | #include <linux/sunrpc/stats.h> |
21 | #include <linux/sunrpc/svc_xprt.h> | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * Declare the debug flags here | 24 | * Declare the debug flags here |
@@ -55,6 +56,30 @@ rpc_unregister_sysctl(void) | |||
55 | } | 56 | } |
56 | } | 57 | } |
57 | 58 | ||
59 | static int proc_do_xprt(ctl_table *table, int write, struct file *file, | ||
60 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
61 | { | ||
62 | char tmpbuf[256]; | ||
63 | int len; | ||
64 | if ((*ppos && !write) || !*lenp) { | ||
65 | *lenp = 0; | ||
66 | return 0; | ||
67 | } | ||
68 | if (write) | ||
69 | return -EINVAL; | ||
70 | else { | ||
71 | len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); | ||
72 | if (!access_ok(VERIFY_WRITE, buffer, len)) | ||
73 | return -EFAULT; | ||
74 | |||
75 | if (__copy_to_user(buffer, tmpbuf, len)) | ||
76 | return -EFAULT; | ||
77 | } | ||
78 | *lenp -= len; | ||
79 | *ppos += len; | ||
80 | return 0; | ||
81 | } | ||
82 | |||
58 | static int | 83 | static int |
59 | proc_dodebug(ctl_table *table, int write, struct file *file, | 84 | proc_dodebug(ctl_table *table, int write, struct file *file, |
60 | void __user *buffer, size_t *lenp, loff_t *ppos) | 85 | void __user *buffer, size_t *lenp, loff_t *ppos) |
@@ -147,6 +172,12 @@ static ctl_table debug_table[] = { | |||
147 | .mode = 0644, | 172 | .mode = 0644, |
148 | .proc_handler = &proc_dodebug | 173 | .proc_handler = &proc_dodebug |
149 | }, | 174 | }, |
175 | { | ||
176 | .procname = "transports", | ||
177 | .maxlen = 256, | ||
178 | .mode = 0444, | ||
179 | .proc_handler = &proc_do_xprt, | ||
180 | }, | ||
150 | { .ctl_name = 0 } | 181 | { .ctl_name = 0 } |
151 | }; | 182 | }; |
152 | 183 | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 54264062ea69..995c3fdc16c2 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
@@ -96,11 +96,13 @@ xdr_encode_string(__be32 *p, const char *string) | |||
96 | EXPORT_SYMBOL(xdr_encode_string); | 96 | EXPORT_SYMBOL(xdr_encode_string); |
97 | 97 | ||
98 | __be32 * | 98 | __be32 * |
99 | xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) | 99 | xdr_decode_string_inplace(__be32 *p, char **sp, |
100 | unsigned int *lenp, unsigned int maxlen) | ||
100 | { | 101 | { |
101 | unsigned int len; | 102 | u32 len; |
102 | 103 | ||
103 | if ((len = ntohl(*p++)) > maxlen) | 104 | len = ntohl(*p++); |
105 | if (len > maxlen) | ||
104 | return NULL; | 106 | return NULL; |
105 | *lenp = len; | 107 | *lenp = len; |
106 | *sp = (char *) p; | 108 | *sp = (char *) p; |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 264f0feeb513..5a8f268bdd30 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -1,3 +1,8 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o |
2 | 2 | ||
3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o |
4 | |||
5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o | ||
6 | |||
7 | svcrdma-y := svc_rdma.o svc_rdma_transport.o \ | ||
8 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c new file mode 100644 index 000000000000..88c0ca20bb1e --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma.c | |||
@@ -0,0 +1,266 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | #include <linux/module.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/fs.h> | ||
44 | #include <linux/sysctl.h> | ||
45 | #include <linux/sunrpc/clnt.h> | ||
46 | #include <linux/sunrpc/sched.h> | ||
47 | #include <linux/sunrpc/svc_rdma.h> | ||
48 | |||
49 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
50 | |||
51 | /* RPC/RDMA parameters */ | ||
52 | unsigned int svcrdma_ord = RPCRDMA_ORD; | ||
53 | static unsigned int min_ord = 1; | ||
54 | static unsigned int max_ord = 4096; | ||
55 | unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; | ||
56 | static unsigned int min_max_requests = 4; | ||
57 | static unsigned int max_max_requests = 16384; | ||
58 | unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; | ||
59 | static unsigned int min_max_inline = 4096; | ||
60 | static unsigned int max_max_inline = 65536; | ||
61 | |||
62 | atomic_t rdma_stat_recv; | ||
63 | atomic_t rdma_stat_read; | ||
64 | atomic_t rdma_stat_write; | ||
65 | atomic_t rdma_stat_sq_starve; | ||
66 | atomic_t rdma_stat_rq_starve; | ||
67 | atomic_t rdma_stat_rq_poll; | ||
68 | atomic_t rdma_stat_rq_prod; | ||
69 | atomic_t rdma_stat_sq_poll; | ||
70 | atomic_t rdma_stat_sq_prod; | ||
71 | |||
72 | /* | ||
73 | * This function implements reading and resetting an atomic_t stat | ||
74 | * variable through read/write to a proc file. Any write to the file | ||
75 | * resets the associated statistic to zero. Any read returns it's | ||
76 | * current value. | ||
77 | */ | ||
78 | static int read_reset_stat(ctl_table *table, int write, | ||
79 | struct file *filp, void __user *buffer, size_t *lenp, | ||
80 | loff_t *ppos) | ||
81 | { | ||
82 | atomic_t *stat = (atomic_t *)table->data; | ||
83 | |||
84 | if (!stat) | ||
85 | return -EINVAL; | ||
86 | |||
87 | if (write) | ||
88 | atomic_set(stat, 0); | ||
89 | else { | ||
90 | char str_buf[32]; | ||
91 | char *data; | ||
92 | int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat)); | ||
93 | if (len >= 32) | ||
94 | return -EFAULT; | ||
95 | len = strlen(str_buf); | ||
96 | if (*ppos > len) { | ||
97 | *lenp = 0; | ||
98 | return 0; | ||
99 | } | ||
100 | data = &str_buf[*ppos]; | ||
101 | len -= *ppos; | ||
102 | if (len > *lenp) | ||
103 | len = *lenp; | ||
104 | if (len && copy_to_user(buffer, str_buf, len)) | ||
105 | return -EFAULT; | ||
106 | *lenp = len; | ||
107 | *ppos += len; | ||
108 | } | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static struct ctl_table_header *svcrdma_table_header; | ||
113 | static ctl_table svcrdma_parm_table[] = { | ||
114 | { | ||
115 | .procname = "max_requests", | ||
116 | .data = &svcrdma_max_requests, | ||
117 | .maxlen = sizeof(unsigned int), | ||
118 | .mode = 0644, | ||
119 | .proc_handler = &proc_dointvec_minmax, | ||
120 | .strategy = &sysctl_intvec, | ||
121 | .extra1 = &min_max_requests, | ||
122 | .extra2 = &max_max_requests | ||
123 | }, | ||
124 | { | ||
125 | .procname = "max_req_size", | ||
126 | .data = &svcrdma_max_req_size, | ||
127 | .maxlen = sizeof(unsigned int), | ||
128 | .mode = 0644, | ||
129 | .proc_handler = &proc_dointvec_minmax, | ||
130 | .strategy = &sysctl_intvec, | ||
131 | .extra1 = &min_max_inline, | ||
132 | .extra2 = &max_max_inline | ||
133 | }, | ||
134 | { | ||
135 | .procname = "max_outbound_read_requests", | ||
136 | .data = &svcrdma_ord, | ||
137 | .maxlen = sizeof(unsigned int), | ||
138 | .mode = 0644, | ||
139 | .proc_handler = &proc_dointvec_minmax, | ||
140 | .strategy = &sysctl_intvec, | ||
141 | .extra1 = &min_ord, | ||
142 | .extra2 = &max_ord, | ||
143 | }, | ||
144 | |||
145 | { | ||
146 | .procname = "rdma_stat_read", | ||
147 | .data = &rdma_stat_read, | ||
148 | .maxlen = sizeof(atomic_t), | ||
149 | .mode = 0644, | ||
150 | .proc_handler = &read_reset_stat, | ||
151 | }, | ||
152 | { | ||
153 | .procname = "rdma_stat_recv", | ||
154 | .data = &rdma_stat_recv, | ||
155 | .maxlen = sizeof(atomic_t), | ||
156 | .mode = 0644, | ||
157 | .proc_handler = &read_reset_stat, | ||
158 | }, | ||
159 | { | ||
160 | .procname = "rdma_stat_write", | ||
161 | .data = &rdma_stat_write, | ||
162 | .maxlen = sizeof(atomic_t), | ||
163 | .mode = 0644, | ||
164 | .proc_handler = &read_reset_stat, | ||
165 | }, | ||
166 | { | ||
167 | .procname = "rdma_stat_sq_starve", | ||
168 | .data = &rdma_stat_sq_starve, | ||
169 | .maxlen = sizeof(atomic_t), | ||
170 | .mode = 0644, | ||
171 | .proc_handler = &read_reset_stat, | ||
172 | }, | ||
173 | { | ||
174 | .procname = "rdma_stat_rq_starve", | ||
175 | .data = &rdma_stat_rq_starve, | ||
176 | .maxlen = sizeof(atomic_t), | ||
177 | .mode = 0644, | ||
178 | .proc_handler = &read_reset_stat, | ||
179 | }, | ||
180 | { | ||
181 | .procname = "rdma_stat_rq_poll", | ||
182 | .data = &rdma_stat_rq_poll, | ||
183 | .maxlen = sizeof(atomic_t), | ||
184 | .mode = 0644, | ||
185 | .proc_handler = &read_reset_stat, | ||
186 | }, | ||
187 | { | ||
188 | .procname = "rdma_stat_rq_prod", | ||
189 | .data = &rdma_stat_rq_prod, | ||
190 | .maxlen = sizeof(atomic_t), | ||
191 | .mode = 0644, | ||
192 | .proc_handler = &read_reset_stat, | ||
193 | }, | ||
194 | { | ||
195 | .procname = "rdma_stat_sq_poll", | ||
196 | .data = &rdma_stat_sq_poll, | ||
197 | .maxlen = sizeof(atomic_t), | ||
198 | .mode = 0644, | ||
199 | .proc_handler = &read_reset_stat, | ||
200 | }, | ||
201 | { | ||
202 | .procname = "rdma_stat_sq_prod", | ||
203 | .data = &rdma_stat_sq_prod, | ||
204 | .maxlen = sizeof(atomic_t), | ||
205 | .mode = 0644, | ||
206 | .proc_handler = &read_reset_stat, | ||
207 | }, | ||
208 | { | ||
209 | .ctl_name = 0, | ||
210 | }, | ||
211 | }; | ||
212 | |||
213 | static ctl_table svcrdma_table[] = { | ||
214 | { | ||
215 | .procname = "svc_rdma", | ||
216 | .mode = 0555, | ||
217 | .child = svcrdma_parm_table | ||
218 | }, | ||
219 | { | ||
220 | .ctl_name = 0, | ||
221 | }, | ||
222 | }; | ||
223 | |||
224 | static ctl_table svcrdma_root_table[] = { | ||
225 | { | ||
226 | .ctl_name = CTL_SUNRPC, | ||
227 | .procname = "sunrpc", | ||
228 | .mode = 0555, | ||
229 | .child = svcrdma_table | ||
230 | }, | ||
231 | { | ||
232 | .ctl_name = 0, | ||
233 | }, | ||
234 | }; | ||
235 | |||
236 | void svc_rdma_cleanup(void) | ||
237 | { | ||
238 | dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); | ||
239 | if (svcrdma_table_header) { | ||
240 | unregister_sysctl_table(svcrdma_table_header); | ||
241 | svcrdma_table_header = NULL; | ||
242 | } | ||
243 | svc_unreg_xprt_class(&svc_rdma_class); | ||
244 | } | ||
245 | |||
246 | int svc_rdma_init(void) | ||
247 | { | ||
248 | dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); | ||
249 | dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); | ||
250 | dprintk("\tmax_requests : %d\n", svcrdma_max_requests); | ||
251 | dprintk("\tsq_depth : %d\n", | ||
252 | svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); | ||
253 | dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); | ||
254 | if (!svcrdma_table_header) | ||
255 | svcrdma_table_header = | ||
256 | register_sysctl_table(svcrdma_root_table); | ||
257 | |||
258 | /* Register RDMA with the SVC transport switch */ | ||
259 | svc_reg_xprt_class(&svc_rdma_class); | ||
260 | return 0; | ||
261 | } | ||
262 | MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); | ||
263 | MODULE_DESCRIPTION("SVC RDMA Transport"); | ||
264 | MODULE_LICENSE("Dual BSD/GPL"); | ||
265 | module_init(svc_rdma_init); | ||
266 | module_exit(svc_rdma_cleanup); | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c new file mode 100644 index 000000000000..9530ef2d40dc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c | |||
@@ -0,0 +1,412 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/xdr.h> | ||
43 | #include <linux/sunrpc/debug.h> | ||
44 | #include <asm/unaligned.h> | ||
45 | #include <linux/sunrpc/rpc_rdma.h> | ||
46 | #include <linux/sunrpc/svc_rdma.h> | ||
47 | |||
48 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
49 | |||
50 | /* | ||
51 | * Decodes a read chunk list. The expected format is as follows: | ||
52 | * descrim : xdr_one | ||
53 | * position : u32 offset into XDR stream | ||
54 | * handle : u32 RKEY | ||
55 | * . . . | ||
56 | * end-of-list: xdr_zero | ||
57 | */ | ||
58 | static u32 *decode_read_list(u32 *va, u32 *vaend) | ||
59 | { | ||
60 | struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; | ||
61 | |||
62 | while (ch->rc_discrim != xdr_zero) { | ||
63 | u64 ch_offset; | ||
64 | |||
65 | if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > | ||
66 | (unsigned long)vaend) { | ||
67 | dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | ch->rc_discrim = ntohl(ch->rc_discrim); | ||
72 | ch->rc_position = ntohl(ch->rc_position); | ||
73 | ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle); | ||
74 | ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length); | ||
75 | va = (u32 *)&ch->rc_target.rs_offset; | ||
76 | xdr_decode_hyper(va, &ch_offset); | ||
77 | put_unaligned(ch_offset, (u64 *)va); | ||
78 | ch++; | ||
79 | } | ||
80 | return (u32 *)&ch->rc_position; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Determine number of chunks and total bytes in chunk list. The chunk | ||
85 | * list has already been verified to fit within the RPCRDMA header. | ||
86 | */ | ||
87 | void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, | ||
88 | int *ch_count, int *byte_count) | ||
89 | { | ||
90 | /* compute the number of bytes represented by read chunks */ | ||
91 | *byte_count = 0; | ||
92 | *ch_count = 0; | ||
93 | for (; ch->rc_discrim != 0; ch++) { | ||
94 | *byte_count = *byte_count + ch->rc_target.rs_length; | ||
95 | *ch_count = *ch_count + 1; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Decodes a write chunk list. The expected format is as follows: | ||
101 | * descrim : xdr_one | ||
102 | * nchunks : <count> | ||
103 | * handle : u32 RKEY ---+ | ||
104 | * length : u32 <len of segment> | | ||
105 | * offset : remove va + <count> | ||
106 | * . . . | | ||
107 | * ---+ | ||
108 | */ | ||
109 | static u32 *decode_write_list(u32 *va, u32 *vaend) | ||
110 | { | ||
111 | int ch_no; | ||
112 | struct rpcrdma_write_array *ary = | ||
113 | (struct rpcrdma_write_array *)va; | ||
114 | |||
115 | /* Check for not write-array */ | ||
116 | if (ary->wc_discrim == xdr_zero) | ||
117 | return (u32 *)&ary->wc_nchunks; | ||
118 | |||
119 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
120 | (unsigned long)vaend) { | ||
121 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
122 | return NULL; | ||
123 | } | ||
124 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
125 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
126 | if (((unsigned long)&ary->wc_array[0] + | ||
127 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
128 | (unsigned long)vaend) { | ||
129 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
130 | ary, ary->wc_nchunks, vaend); | ||
131 | return NULL; | ||
132 | } | ||
133 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
134 | u64 ch_offset; | ||
135 | |||
136 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
137 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
138 | ary->wc_array[ch_no].wc_target.rs_length = | ||
139 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
140 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
141 | xdr_decode_hyper(va, &ch_offset); | ||
142 | put_unaligned(ch_offset, (u64 *)va); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
147 | * address skips the list terminator | ||
148 | */ | ||
149 | return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length; | ||
150 | } | ||
151 | |||
152 | static u32 *decode_reply_array(u32 *va, u32 *vaend) | ||
153 | { | ||
154 | int ch_no; | ||
155 | struct rpcrdma_write_array *ary = | ||
156 | (struct rpcrdma_write_array *)va; | ||
157 | |||
158 | /* Check for no reply-array */ | ||
159 | if (ary->wc_discrim == xdr_zero) | ||
160 | return (u32 *)&ary->wc_nchunks; | ||
161 | |||
162 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
163 | (unsigned long)vaend) { | ||
164 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
165 | return NULL; | ||
166 | } | ||
167 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
168 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
169 | if (((unsigned long)&ary->wc_array[0] + | ||
170 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
171 | (unsigned long)vaend) { | ||
172 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
173 | ary, ary->wc_nchunks, vaend); | ||
174 | return NULL; | ||
175 | } | ||
176 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
177 | u64 ch_offset; | ||
178 | |||
179 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
180 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
181 | ary->wc_array[ch_no].wc_target.rs_length = | ||
182 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
183 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
184 | xdr_decode_hyper(va, &ch_offset); | ||
185 | put_unaligned(ch_offset, (u64 *)va); | ||
186 | } | ||
187 | |||
188 | return (u32 *)&ary->wc_array[ch_no]; | ||
189 | } | ||
190 | |||
191 | int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, | ||
192 | struct svc_rqst *rqstp) | ||
193 | { | ||
194 | struct rpcrdma_msg *rmsgp = NULL; | ||
195 | u32 *va; | ||
196 | u32 *vaend; | ||
197 | u32 hdr_len; | ||
198 | |||
199 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
200 | |||
201 | /* Verify that there's enough bytes for header + something */ | ||
202 | if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { | ||
203 | dprintk("svcrdma: header too short = %d\n", | ||
204 | rqstp->rq_arg.len); | ||
205 | return -EINVAL; | ||
206 | } | ||
207 | |||
208 | /* Decode the header */ | ||
209 | rmsgp->rm_xid = ntohl(rmsgp->rm_xid); | ||
210 | rmsgp->rm_vers = ntohl(rmsgp->rm_vers); | ||
211 | rmsgp->rm_credit = ntohl(rmsgp->rm_credit); | ||
212 | rmsgp->rm_type = ntohl(rmsgp->rm_type); | ||
213 | |||
214 | if (rmsgp->rm_vers != RPCRDMA_VERSION) | ||
215 | return -ENOSYS; | ||
216 | |||
217 | /* Pull in the extra for the padded case and bump our pointer */ | ||
218 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
219 | int hdrlen; | ||
220 | rmsgp->rm_body.rm_padded.rm_align = | ||
221 | ntohl(rmsgp->rm_body.rm_padded.rm_align); | ||
222 | rmsgp->rm_body.rm_padded.rm_thresh = | ||
223 | ntohl(rmsgp->rm_body.rm_padded.rm_thresh); | ||
224 | |||
225 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
226 | rqstp->rq_arg.head[0].iov_base = va; | ||
227 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
228 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
229 | if (hdrlen > rqstp->rq_arg.len) | ||
230 | return -EINVAL; | ||
231 | return hdrlen; | ||
232 | } | ||
233 | |||
234 | /* The chunk list may contain either a read chunk list or a write | ||
235 | * chunk list and a reply chunk list. | ||
236 | */ | ||
237 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
238 | vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); | ||
239 | va = decode_read_list(va, vaend); | ||
240 | if (!va) | ||
241 | return -EINVAL; | ||
242 | va = decode_write_list(va, vaend); | ||
243 | if (!va) | ||
244 | return -EINVAL; | ||
245 | va = decode_reply_array(va, vaend); | ||
246 | if (!va) | ||
247 | return -EINVAL; | ||
248 | |||
249 | rqstp->rq_arg.head[0].iov_base = va; | ||
250 | hdr_len = (unsigned long)va - (unsigned long)rmsgp; | ||
251 | rqstp->rq_arg.head[0].iov_len -= hdr_len; | ||
252 | |||
253 | *rdma_req = rmsgp; | ||
254 | return hdr_len; | ||
255 | } | ||
256 | |||
257 | int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) | ||
258 | { | ||
259 | struct rpcrdma_msg *rmsgp = NULL; | ||
260 | struct rpcrdma_read_chunk *ch; | ||
261 | struct rpcrdma_write_array *ary; | ||
262 | u32 *va; | ||
263 | u32 hdrlen; | ||
264 | |||
265 | dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", | ||
266 | rqstp); | ||
267 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
268 | |||
269 | /* Pull in the extra for the padded case and bump our pointer */ | ||
270 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
271 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
272 | rqstp->rq_arg.head[0].iov_base = va; | ||
273 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
274 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
275 | return hdrlen; | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Skip all chunks to find RPC msg. These were previously processed | ||
280 | */ | ||
281 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
282 | |||
283 | /* Skip read-list */ | ||
284 | for (ch = (struct rpcrdma_read_chunk *)va; | ||
285 | ch->rc_discrim != xdr_zero; ch++); | ||
286 | va = (u32 *)&ch->rc_position; | ||
287 | |||
288 | /* Skip write-list */ | ||
289 | ary = (struct rpcrdma_write_array *)va; | ||
290 | if (ary->wc_discrim == xdr_zero) | ||
291 | va = (u32 *)&ary->wc_nchunks; | ||
292 | else | ||
293 | /* | ||
294 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
295 | * address skips the list terminator | ||
296 | */ | ||
297 | va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; | ||
298 | |||
299 | /* Skip reply-array */ | ||
300 | ary = (struct rpcrdma_write_array *)va; | ||
301 | if (ary->wc_discrim == xdr_zero) | ||
302 | va = (u32 *)&ary->wc_nchunks; | ||
303 | else | ||
304 | va = (u32 *)&ary->wc_array[ary->wc_nchunks]; | ||
305 | |||
306 | rqstp->rq_arg.head[0].iov_base = va; | ||
307 | hdrlen = (unsigned long)va - (unsigned long)rmsgp; | ||
308 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
309 | |||
310 | return hdrlen; | ||
311 | } | ||
312 | |||
313 | int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, | ||
314 | struct rpcrdma_msg *rmsgp, | ||
315 | enum rpcrdma_errcode err, u32 *va) | ||
316 | { | ||
317 | u32 *startp = va; | ||
318 | |||
319 | *va++ = htonl(rmsgp->rm_xid); | ||
320 | *va++ = htonl(rmsgp->rm_vers); | ||
321 | *va++ = htonl(xprt->sc_max_requests); | ||
322 | *va++ = htonl(RDMA_ERROR); | ||
323 | *va++ = htonl(err); | ||
324 | if (err == ERR_VERS) { | ||
325 | *va++ = htonl(RPCRDMA_VERSION); | ||
326 | *va++ = htonl(RPCRDMA_VERSION); | ||
327 | } | ||
328 | |||
329 | return (int)((unsigned long)va - (unsigned long)startp); | ||
330 | } | ||
331 | |||
332 | int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) | ||
333 | { | ||
334 | struct rpcrdma_write_array *wr_ary; | ||
335 | |||
336 | /* There is no read-list in a reply */ | ||
337 | |||
338 | /* skip write list */ | ||
339 | wr_ary = (struct rpcrdma_write_array *) | ||
340 | &rmsgp->rm_body.rm_chunks[1]; | ||
341 | if (wr_ary->wc_discrim) | ||
342 | wr_ary = (struct rpcrdma_write_array *) | ||
343 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. | ||
344 | wc_target.rs_length; | ||
345 | else | ||
346 | wr_ary = (struct rpcrdma_write_array *) | ||
347 | &wr_ary->wc_nchunks; | ||
348 | |||
349 | /* skip reply array */ | ||
350 | if (wr_ary->wc_discrim) | ||
351 | wr_ary = (struct rpcrdma_write_array *) | ||
352 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; | ||
353 | else | ||
354 | wr_ary = (struct rpcrdma_write_array *) | ||
355 | &wr_ary->wc_nchunks; | ||
356 | |||
357 | return (unsigned long) wr_ary - (unsigned long) rmsgp; | ||
358 | } | ||
359 | |||
360 | void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) | ||
361 | { | ||
362 | struct rpcrdma_write_array *ary; | ||
363 | |||
364 | /* no read-list */ | ||
365 | rmsgp->rm_body.rm_chunks[0] = xdr_zero; | ||
366 | |||
367 | /* write-array discrim */ | ||
368 | ary = (struct rpcrdma_write_array *) | ||
369 | &rmsgp->rm_body.rm_chunks[1]; | ||
370 | ary->wc_discrim = xdr_one; | ||
371 | ary->wc_nchunks = htonl(chunks); | ||
372 | |||
373 | /* write-list terminator */ | ||
374 | ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; | ||
375 | |||
376 | /* reply-array discriminator */ | ||
377 | ary->wc_array[chunks].wc_target.rs_length = xdr_zero; | ||
378 | } | ||
379 | |||
380 | void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, | ||
381 | int chunks) | ||
382 | { | ||
383 | ary->wc_discrim = xdr_one; | ||
384 | ary->wc_nchunks = htonl(chunks); | ||
385 | } | ||
386 | |||
387 | void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, | ||
388 | int chunk_no, | ||
389 | u32 rs_handle, u64 rs_offset, | ||
390 | u32 write_len) | ||
391 | { | ||
392 | struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; | ||
393 | seg->rs_handle = htonl(rs_handle); | ||
394 | seg->rs_length = htonl(write_len); | ||
395 | xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset); | ||
396 | } | ||
397 | |||
398 | void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, | ||
399 | struct rpcrdma_msg *rdma_argp, | ||
400 | struct rpcrdma_msg *rdma_resp, | ||
401 | enum rpcrdma_proc rdma_type) | ||
402 | { | ||
403 | rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); | ||
404 | rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); | ||
405 | rdma_resp->rm_credit = htonl(xprt->sc_max_requests); | ||
406 | rdma_resp->rm_type = htonl(rdma_type); | ||
407 | |||
408 | /* Encode <nul> chunks lists */ | ||
409 | rdma_resp->rm_body.rm_chunks[0] = xdr_zero; | ||
410 | rdma_resp->rm_body.rm_chunks[1] = xdr_zero; | ||
411 | rdma_resp->rm_body.rm_chunks[2] = xdr_zero; | ||
412 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c new file mode 100644 index 000000000000..ab54a736486e --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
@@ -0,0 +1,586 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/debug.h> | ||
43 | #include <linux/sunrpc/rpc_rdma.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | #include <asm/unaligned.h> | ||
46 | #include <rdma/ib_verbs.h> | ||
47 | #include <rdma/rdma_cm.h> | ||
48 | #include <linux/sunrpc/svc_rdma.h> | ||
49 | |||
50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
51 | |||
52 | /* | ||
53 | * Replace the pages in the rq_argpages array with the pages from the SGE in | ||
54 | * the RDMA_RECV completion. The SGL should contain full pages up until the | ||
55 | * last one. | ||
56 | */ | ||
57 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | ||
58 | struct svc_rdma_op_ctxt *ctxt, | ||
59 | u32 byte_count) | ||
60 | { | ||
61 | struct page *page; | ||
62 | u32 bc; | ||
63 | int sge_no; | ||
64 | |||
65 | /* Swap the page in the SGE with the page in argpages */ | ||
66 | page = ctxt->pages[0]; | ||
67 | put_page(rqstp->rq_pages[0]); | ||
68 | rqstp->rq_pages[0] = page; | ||
69 | |||
70 | /* Set up the XDR head */ | ||
71 | rqstp->rq_arg.head[0].iov_base = page_address(page); | ||
72 | rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length); | ||
73 | rqstp->rq_arg.len = byte_count; | ||
74 | rqstp->rq_arg.buflen = byte_count; | ||
75 | |||
76 | /* Compute bytes past head in the SGL */ | ||
77 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | ||
78 | |||
79 | /* If data remains, store it in the pagelist */ | ||
80 | rqstp->rq_arg.page_len = bc; | ||
81 | rqstp->rq_arg.page_base = 0; | ||
82 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | ||
83 | sge_no = 1; | ||
84 | while (bc && sge_no < ctxt->count) { | ||
85 | page = ctxt->pages[sge_no]; | ||
86 | put_page(rqstp->rq_pages[sge_no]); | ||
87 | rqstp->rq_pages[sge_no] = page; | ||
88 | bc -= min(bc, ctxt->sge[sge_no].length); | ||
89 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; | ||
90 | sge_no++; | ||
91 | } | ||
92 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | ||
93 | |||
94 | /* We should never run out of SGE because the limit is defined to | ||
95 | * support the max allowed RPC data length | ||
96 | */ | ||
97 | BUG_ON(bc && (sge_no == ctxt->count)); | ||
98 | BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) | ||
99 | != byte_count); | ||
100 | BUG_ON(rqstp->rq_arg.len != byte_count); | ||
101 | |||
102 | /* If not all pages were used from the SGL, free the remaining ones */ | ||
103 | bc = sge_no; | ||
104 | while (sge_no < ctxt->count) { | ||
105 | page = ctxt->pages[sge_no++]; | ||
106 | put_page(page); | ||
107 | } | ||
108 | ctxt->count = bc; | ||
109 | |||
110 | /* Set up tail */ | ||
111 | rqstp->rq_arg.tail[0].iov_base = NULL; | ||
112 | rqstp->rq_arg.tail[0].iov_len = 0; | ||
113 | } | ||
114 | |||
115 | struct chunk_sge { | ||
116 | int start; /* sge no for this chunk */ | ||
117 | int count; /* sge count for this chunk */ | ||
118 | }; | ||
119 | |||
120 | /* Encode a read-chunk-list as an array of IB SGE | ||
121 | * | ||
122 | * Assumptions: | ||
123 | * - chunk[0]->position points to pages[0] at an offset of 0 | ||
124 | * - pages[] is not physically or virtually contigous and consists of | ||
125 | * PAGE_SIZE elements. | ||
126 | * | ||
127 | * Output: | ||
128 | * - sge array pointing into pages[] array. | ||
129 | * - chunk_sge array specifying sge index and count for each | ||
130 | * chunk in the read list | ||
131 | * | ||
132 | */ | ||
133 | static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | ||
134 | struct svc_rqst *rqstp, | ||
135 | struct svc_rdma_op_ctxt *head, | ||
136 | struct rpcrdma_msg *rmsgp, | ||
137 | struct ib_sge *sge, | ||
138 | struct chunk_sge *ch_sge_ary, | ||
139 | int ch_count, | ||
140 | int byte_count) | ||
141 | { | ||
142 | int sge_no; | ||
143 | int sge_bytes; | ||
144 | int page_off; | ||
145 | int page_no; | ||
146 | int ch_bytes; | ||
147 | int ch_no; | ||
148 | struct rpcrdma_read_chunk *ch; | ||
149 | |||
150 | sge_no = 0; | ||
151 | page_no = 0; | ||
152 | page_off = 0; | ||
153 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
154 | ch_no = 0; | ||
155 | ch_bytes = ch->rc_target.rs_length; | ||
156 | head->arg.head[0] = rqstp->rq_arg.head[0]; | ||
157 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | ||
158 | head->arg.pages = &head->pages[head->count]; | ||
159 | head->sge[0].length = head->count; /* save count of hdr pages */ | ||
160 | head->arg.page_base = 0; | ||
161 | head->arg.page_len = ch_bytes; | ||
162 | head->arg.len = rqstp->rq_arg.len + ch_bytes; | ||
163 | head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; | ||
164 | head->count++; | ||
165 | ch_sge_ary[0].start = 0; | ||
166 | while (byte_count) { | ||
167 | sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); | ||
168 | sge[sge_no].addr = | ||
169 | ib_dma_map_page(xprt->sc_cm_id->device, | ||
170 | rqstp->rq_arg.pages[page_no], | ||
171 | page_off, sge_bytes, | ||
172 | DMA_FROM_DEVICE); | ||
173 | sge[sge_no].length = sge_bytes; | ||
174 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
175 | /* | ||
176 | * Don't bump head->count here because the same page | ||
177 | * may be used by multiple SGE. | ||
178 | */ | ||
179 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; | ||
180 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | ||
181 | |||
182 | byte_count -= sge_bytes; | ||
183 | ch_bytes -= sge_bytes; | ||
184 | sge_no++; | ||
185 | /* | ||
186 | * If all bytes for this chunk have been mapped to an | ||
187 | * SGE, move to the next SGE | ||
188 | */ | ||
189 | if (ch_bytes == 0) { | ||
190 | ch_sge_ary[ch_no].count = | ||
191 | sge_no - ch_sge_ary[ch_no].start; | ||
192 | ch_no++; | ||
193 | ch++; | ||
194 | ch_sge_ary[ch_no].start = sge_no; | ||
195 | ch_bytes = ch->rc_target.rs_length; | ||
196 | /* If bytes remaining account for next chunk */ | ||
197 | if (byte_count) { | ||
198 | head->arg.page_len += ch_bytes; | ||
199 | head->arg.len += ch_bytes; | ||
200 | head->arg.buflen += ch_bytes; | ||
201 | } | ||
202 | } | ||
203 | /* | ||
204 | * If this SGE consumed all of the page, move to the | ||
205 | * next page | ||
206 | */ | ||
207 | if ((sge_bytes + page_off) == PAGE_SIZE) { | ||
208 | page_no++; | ||
209 | page_off = 0; | ||
210 | /* | ||
211 | * If there are still bytes left to map, bump | ||
212 | * the page count | ||
213 | */ | ||
214 | if (byte_count) | ||
215 | head->count++; | ||
216 | } else | ||
217 | page_off += sge_bytes; | ||
218 | } | ||
219 | BUG_ON(byte_count != 0); | ||
220 | return sge_no; | ||
221 | } | ||
222 | |||
223 | static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, | ||
224 | struct ib_sge *sge, | ||
225 | u64 *sgl_offset, | ||
226 | int count) | ||
227 | { | ||
228 | int i; | ||
229 | |||
230 | ctxt->count = count; | ||
231 | for (i = 0; i < count; i++) { | ||
232 | ctxt->sge[i].addr = sge[i].addr; | ||
233 | ctxt->sge[i].length = sge[i].length; | ||
234 | *sgl_offset = *sgl_offset + sge[i].length; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) | ||
239 | { | ||
240 | #ifdef RDMA_TRANSPORT_IWARP | ||
241 | if ((RDMA_TRANSPORT_IWARP == | ||
242 | rdma_node_get_transport(xprt->sc_cm_id-> | ||
243 | device->node_type)) | ||
244 | && sge_count > 1) | ||
245 | return 1; | ||
246 | else | ||
247 | #endif | ||
248 | return min_t(int, sge_count, xprt->sc_max_sge); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Use RDMA_READ to read data from the advertised client buffer into the | ||
253 | * XDR stream starting at rq_arg.head[0].iov_base. | ||
254 | * Each chunk in the array | ||
255 | * contains the following fields: | ||
256 | * discrim - '1', This isn't used for data placement | ||
257 | * position - The xdr stream offset (the same for every chunk) | ||
258 | * handle - RMR for client memory region | ||
259 | * length - data transfer length | ||
260 | * offset - 64 bit tagged offset in remote memory region | ||
261 | * | ||
262 | * On our side, we need to read into a pagelist. The first page immediately | ||
263 | * follows the RPC header. | ||
264 | * | ||
265 | * This function returns 1 to indicate success. The data is not yet in | ||
266 | * the pagelist and therefore the RPC request must be deferred. The | ||
267 | * I/O completion will enqueue the transport again and | ||
268 | * svc_rdma_recvfrom will complete the request. | ||
269 | * | ||
270 | * NOTE: The ctxt must not be touched after the last WR has been posted | ||
271 | * because the I/O completion processing may occur on another | ||
272 | * processor and free / modify the context. Ne touche pas! | ||
273 | */ | ||
274 | static int rdma_read_xdr(struct svcxprt_rdma *xprt, | ||
275 | struct rpcrdma_msg *rmsgp, | ||
276 | struct svc_rqst *rqstp, | ||
277 | struct svc_rdma_op_ctxt *hdr_ctxt) | ||
278 | { | ||
279 | struct ib_send_wr read_wr; | ||
280 | int err = 0; | ||
281 | int ch_no; | ||
282 | struct ib_sge *sge; | ||
283 | int ch_count; | ||
284 | int byte_count; | ||
285 | int sge_count; | ||
286 | u64 sgl_offset; | ||
287 | struct rpcrdma_read_chunk *ch; | ||
288 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
289 | struct svc_rdma_op_ctxt *head; | ||
290 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; | ||
291 | struct svc_rdma_op_ctxt *tmp_ch_ctxt; | ||
292 | struct chunk_sge *ch_sge_ary; | ||
293 | |||
294 | /* If no read list is present, return 0 */ | ||
295 | ch = svc_rdma_get_read_chunk(rmsgp); | ||
296 | if (!ch) | ||
297 | return 0; | ||
298 | |||
299 | /* Allocate temporary contexts to keep SGE */ | ||
300 | BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge)); | ||
301 | tmp_sge_ctxt = svc_rdma_get_context(xprt); | ||
302 | sge = tmp_sge_ctxt->sge; | ||
303 | tmp_ch_ctxt = svc_rdma_get_context(xprt); | ||
304 | ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; | ||
305 | |||
306 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); | ||
307 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, | ||
308 | sge, ch_sge_ary, | ||
309 | ch_count, byte_count); | ||
310 | head = svc_rdma_get_context(xprt); | ||
311 | sgl_offset = 0; | ||
312 | ch_no = 0; | ||
313 | |||
314 | for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
315 | ch->rc_discrim != 0; ch++, ch_no++) { | ||
316 | next_sge: | ||
317 | if (!ctxt) | ||
318 | ctxt = head; | ||
319 | else { | ||
320 | ctxt->next = svc_rdma_get_context(xprt); | ||
321 | ctxt = ctxt->next; | ||
322 | } | ||
323 | ctxt->next = NULL; | ||
324 | ctxt->direction = DMA_FROM_DEVICE; | ||
325 | clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); | ||
326 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | ||
327 | if ((ch+1)->rc_discrim == 0) { | ||
328 | /* | ||
329 | * Checked in sq_cq_reap to see if we need to | ||
330 | * be enqueued | ||
331 | */ | ||
332 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | ||
333 | ctxt->next = hdr_ctxt; | ||
334 | hdr_ctxt->next = head; | ||
335 | } | ||
336 | |||
337 | /* Prepare READ WR */ | ||
338 | memset(&read_wr, 0, sizeof read_wr); | ||
339 | ctxt->wr_op = IB_WR_RDMA_READ; | ||
340 | read_wr.wr_id = (unsigned long)ctxt; | ||
341 | read_wr.opcode = IB_WR_RDMA_READ; | ||
342 | read_wr.send_flags = IB_SEND_SIGNALED; | ||
343 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; | ||
344 | read_wr.wr.rdma.remote_addr = | ||
345 | get_unaligned(&(ch->rc_target.rs_offset)) + | ||
346 | sgl_offset; | ||
347 | read_wr.sg_list = &sge[ch_sge_ary[ch_no].start]; | ||
348 | read_wr.num_sge = | ||
349 | rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count); | ||
350 | rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], | ||
351 | &sgl_offset, | ||
352 | read_wr.num_sge); | ||
353 | |||
354 | /* Post the read */ | ||
355 | err = svc_rdma_send(xprt, &read_wr); | ||
356 | if (err) { | ||
357 | printk(KERN_ERR "svcrdma: Error posting send = %d\n", | ||
358 | err); | ||
359 | /* | ||
360 | * Break the circular list so free knows when | ||
361 | * to stop if the error happened to occur on | ||
362 | * the last read | ||
363 | */ | ||
364 | ctxt->next = NULL; | ||
365 | goto out; | ||
366 | } | ||
367 | atomic_inc(&rdma_stat_read); | ||
368 | |||
369 | if (read_wr.num_sge < ch_sge_ary[ch_no].count) { | ||
370 | ch_sge_ary[ch_no].count -= read_wr.num_sge; | ||
371 | ch_sge_ary[ch_no].start += read_wr.num_sge; | ||
372 | goto next_sge; | ||
373 | } | ||
374 | sgl_offset = 0; | ||
375 | err = 0; | ||
376 | } | ||
377 | |||
378 | out: | ||
379 | svc_rdma_put_context(tmp_sge_ctxt, 0); | ||
380 | svc_rdma_put_context(tmp_ch_ctxt, 0); | ||
381 | |||
382 | /* Detach arg pages. svc_recv will replenish them */ | ||
383 | for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) | ||
384 | rqstp->rq_pages[ch_no] = NULL; | ||
385 | |||
386 | /* | ||
387 | * Detach res pages. svc_release must see a resused count of | ||
388 | * zero or it will attempt to put them. | ||
389 | */ | ||
390 | while (rqstp->rq_resused) | ||
391 | rqstp->rq_respages[--rqstp->rq_resused] = NULL; | ||
392 | |||
393 | if (err) { | ||
394 | printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); | ||
395 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
396 | /* Free the linked list of read contexts */ | ||
397 | while (head != NULL) { | ||
398 | ctxt = head->next; | ||
399 | svc_rdma_put_context(head, 1); | ||
400 | head = ctxt; | ||
401 | } | ||
402 | return 0; | ||
403 | } | ||
404 | |||
405 | return 1; | ||
406 | } | ||
407 | |||
408 | static int rdma_read_complete(struct svc_rqst *rqstp, | ||
409 | struct svc_rdma_op_ctxt *data) | ||
410 | { | ||
411 | struct svc_rdma_op_ctxt *head = data->next; | ||
412 | int page_no; | ||
413 | int ret; | ||
414 | |||
415 | BUG_ON(!head); | ||
416 | |||
417 | /* Copy RPC pages */ | ||
418 | for (page_no = 0; page_no < head->count; page_no++) { | ||
419 | put_page(rqstp->rq_pages[page_no]); | ||
420 | rqstp->rq_pages[page_no] = head->pages[page_no]; | ||
421 | } | ||
422 | /* Point rq_arg.pages past header */ | ||
423 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length]; | ||
424 | rqstp->rq_arg.page_len = head->arg.page_len; | ||
425 | rqstp->rq_arg.page_base = head->arg.page_base; | ||
426 | |||
427 | /* rq_respages starts after the last arg page */ | ||
428 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | ||
429 | rqstp->rq_resused = 0; | ||
430 | |||
431 | /* Rebuild rq_arg head and tail. */ | ||
432 | rqstp->rq_arg.head[0] = head->arg.head[0]; | ||
433 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | ||
434 | rqstp->rq_arg.len = head->arg.len; | ||
435 | rqstp->rq_arg.buflen = head->arg.buflen; | ||
436 | |||
437 | /* XXX: What should this be? */ | ||
438 | rqstp->rq_prot = IPPROTO_MAX; | ||
439 | |||
440 | /* | ||
441 | * Free the contexts we used to build the RDMA_READ. We have | ||
442 | * to be careful here because the context list uses the same | ||
443 | * next pointer used to chain the contexts associated with the | ||
444 | * RDMA_READ | ||
445 | */ | ||
446 | data->next = NULL; /* terminate circular list */ | ||
447 | do { | ||
448 | data = head->next; | ||
449 | svc_rdma_put_context(head, 0); | ||
450 | head = data; | ||
451 | } while (head != NULL); | ||
452 | |||
453 | ret = rqstp->rq_arg.head[0].iov_len | ||
454 | + rqstp->rq_arg.page_len | ||
455 | + rqstp->rq_arg.tail[0].iov_len; | ||
456 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " | ||
457 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | ||
458 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, | ||
459 | rqstp->rq_arg.head[0].iov_len); | ||
460 | |||
461 | /* Indicate that we've consumed an RQ credit */ | ||
462 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
463 | svc_xprt_received(rqstp->rq_xprt); | ||
464 | return ret; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Set up the rqstp thread context to point to the RQ buffer. If | ||
469 | * necessary, pull additional data from the client with an RDMA_READ | ||
470 | * request. | ||
471 | */ | ||
472 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | ||
473 | { | ||
474 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
475 | struct svcxprt_rdma *rdma_xprt = | ||
476 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
477 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
478 | struct rpcrdma_msg *rmsgp; | ||
479 | int ret = 0; | ||
480 | int len; | ||
481 | |||
482 | dprintk("svcrdma: rqstp=%p\n", rqstp); | ||
483 | |||
484 | /* | ||
485 | * The rq_xprt_ctxt indicates if we've consumed an RQ credit | ||
486 | * or not. It is used in the rdma xpo_release_rqst function to | ||
487 | * determine whether or not to return an RQ WQE to the RQ. | ||
488 | */ | ||
489 | rqstp->rq_xprt_ctxt = NULL; | ||
490 | |||
491 | spin_lock_bh(&rdma_xprt->sc_read_complete_lock); | ||
492 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { | ||
493 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | ||
494 | struct svc_rdma_op_ctxt, | ||
495 | dto_q); | ||
496 | list_del_init(&ctxt->dto_q); | ||
497 | } | ||
498 | spin_unlock_bh(&rdma_xprt->sc_read_complete_lock); | ||
499 | if (ctxt) | ||
500 | return rdma_read_complete(rqstp, ctxt); | ||
501 | |||
502 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); | ||
503 | if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { | ||
504 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, | ||
505 | struct svc_rdma_op_ctxt, | ||
506 | dto_q); | ||
507 | list_del_init(&ctxt->dto_q); | ||
508 | } else { | ||
509 | atomic_inc(&rdma_stat_rq_starve); | ||
510 | clear_bit(XPT_DATA, &xprt->xpt_flags); | ||
511 | ctxt = NULL; | ||
512 | } | ||
513 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | ||
514 | if (!ctxt) { | ||
515 | /* This is the EAGAIN path. The svc_recv routine will | ||
516 | * return -EAGAIN, the nfsd thread will go to call into | ||
517 | * svc_recv again and we shouldn't be on the active | ||
518 | * transport list | ||
519 | */ | ||
520 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | ||
521 | goto close_out; | ||
522 | |||
523 | BUG_ON(ret); | ||
524 | goto out; | ||
525 | } | ||
526 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | ||
527 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | ||
528 | BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); | ||
529 | atomic_inc(&rdma_stat_recv); | ||
530 | |||
531 | /* Build up the XDR from the receive buffers. */ | ||
532 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | ||
533 | |||
534 | /* Decode the RDMA header. */ | ||
535 | len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); | ||
536 | rqstp->rq_xprt_hlen = len; | ||
537 | |||
538 | /* If the request is invalid, reply with an error */ | ||
539 | if (len < 0) { | ||
540 | if (len == -ENOSYS) | ||
541 | (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); | ||
542 | goto close_out; | ||
543 | } | ||
544 | |||
545 | /* Read read-list data. If we would need to wait, defer | ||
546 | * it. Not that in this case, we don't return the RQ credit | ||
547 | * until after the read completes. | ||
548 | */ | ||
549 | if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) { | ||
550 | svc_xprt_received(xprt); | ||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | /* Indicate we've consumed an RQ credit */ | ||
555 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
556 | |||
557 | ret = rqstp->rq_arg.head[0].iov_len | ||
558 | + rqstp->rq_arg.page_len | ||
559 | + rqstp->rq_arg.tail[0].iov_len; | ||
560 | svc_rdma_put_context(ctxt, 0); | ||
561 | out: | ||
562 | dprintk("svcrdma: ret = %d, rq_arg.len =%d, " | ||
563 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | ||
564 | ret, rqstp->rq_arg.len, | ||
565 | rqstp->rq_arg.head[0].iov_base, | ||
566 | rqstp->rq_arg.head[0].iov_len); | ||
567 | rqstp->rq_prot = IPPROTO_MAX; | ||
568 | svc_xprt_copy_addrs(rqstp, xprt); | ||
569 | svc_xprt_received(xprt); | ||
570 | return ret; | ||
571 | |||
572 | close_out: | ||
573 | if (ctxt) { | ||
574 | svc_rdma_put_context(ctxt, 1); | ||
575 | /* Indicate we've consumed an RQ credit */ | ||
576 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
577 | } | ||
578 | dprintk("svcrdma: transport %p is closing\n", xprt); | ||
579 | /* | ||
580 | * Set the close bit and enqueue it. svc_recv will see the | ||
581 | * close bit and call svc_xprt_delete | ||
582 | */ | ||
583 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
584 | svc_xprt_received(xprt); | ||
585 | return 0; | ||
586 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c new file mode 100644 index 000000000000..3e321949e1dc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
@@ -0,0 +1,520 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/debug.h> | ||
43 | #include <linux/sunrpc/rpc_rdma.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | #include <asm/unaligned.h> | ||
46 | #include <rdma/ib_verbs.h> | ||
47 | #include <rdma/rdma_cm.h> | ||
48 | #include <linux/sunrpc/svc_rdma.h> | ||
49 | |||
50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
51 | |||
52 | /* Encode an XDR as an array of IB SGE | ||
53 | * | ||
54 | * Assumptions: | ||
55 | * - head[0] is physically contiguous. | ||
56 | * - tail[0] is physically contiguous. | ||
57 | * - pages[] is not physically or virtually contigous and consists of | ||
58 | * PAGE_SIZE elements. | ||
59 | * | ||
60 | * Output: | ||
61 | * SGE[0] reserved for RCPRDMA header | ||
62 | * SGE[1] data from xdr->head[] | ||
63 | * SGE[2..sge_count-2] data from xdr->pages[] | ||
64 | * SGE[sge_count-1] data from xdr->tail. | ||
65 | * | ||
66 | */ | ||
67 | static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, | ||
68 | struct xdr_buf *xdr, | ||
69 | struct ib_sge *sge, | ||
70 | int *sge_count) | ||
71 | { | ||
72 | /* Max we need is the length of the XDR / pagesize + one for | ||
73 | * head + one for tail + one for RPCRDMA header | ||
74 | */ | ||
75 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; | ||
76 | int sge_no; | ||
77 | u32 byte_count = xdr->len; | ||
78 | u32 sge_bytes; | ||
79 | u32 page_bytes; | ||
80 | int page_off; | ||
81 | int page_no; | ||
82 | |||
83 | /* Skip the first sge, this is for the RPCRDMA header */ | ||
84 | sge_no = 1; | ||
85 | |||
86 | /* Head SGE */ | ||
87 | sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, | ||
88 | xdr->head[0].iov_base, | ||
89 | xdr->head[0].iov_len, | ||
90 | DMA_TO_DEVICE); | ||
91 | sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len); | ||
92 | byte_count -= sge_bytes; | ||
93 | sge[sge_no].length = sge_bytes; | ||
94 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
95 | sge_no++; | ||
96 | |||
97 | /* pages SGE */ | ||
98 | page_no = 0; | ||
99 | page_bytes = xdr->page_len; | ||
100 | page_off = xdr->page_base; | ||
101 | while (byte_count && page_bytes) { | ||
102 | sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); | ||
103 | sge[sge_no].addr = | ||
104 | ib_dma_map_page(xprt->sc_cm_id->device, | ||
105 | xdr->pages[page_no], page_off, | ||
106 | sge_bytes, DMA_TO_DEVICE); | ||
107 | sge_bytes = min(sge_bytes, page_bytes); | ||
108 | byte_count -= sge_bytes; | ||
109 | page_bytes -= sge_bytes; | ||
110 | sge[sge_no].length = sge_bytes; | ||
111 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
112 | |||
113 | sge_no++; | ||
114 | page_no++; | ||
115 | page_off = 0; /* reset for next time through loop */ | ||
116 | } | ||
117 | |||
118 | /* Tail SGE */ | ||
119 | if (byte_count && xdr->tail[0].iov_len) { | ||
120 | sge[sge_no].addr = | ||
121 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
122 | xdr->tail[0].iov_base, | ||
123 | xdr->tail[0].iov_len, | ||
124 | DMA_TO_DEVICE); | ||
125 | sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len); | ||
126 | byte_count -= sge_bytes; | ||
127 | sge[sge_no].length = sge_bytes; | ||
128 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
129 | sge_no++; | ||
130 | } | ||
131 | |||
132 | BUG_ON(sge_no > sge_max); | ||
133 | BUG_ON(byte_count != 0); | ||
134 | |||
135 | *sge_count = sge_no; | ||
136 | return sge; | ||
137 | } | ||
138 | |||
139 | |||
140 | /* Assumptions: | ||
141 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE | ||
142 | */ | ||
143 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | ||
144 | u32 rmr, u64 to, | ||
145 | u32 xdr_off, int write_len, | ||
146 | struct ib_sge *xdr_sge, int sge_count) | ||
147 | { | ||
148 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; | ||
149 | struct ib_send_wr write_wr; | ||
150 | struct ib_sge *sge; | ||
151 | int xdr_sge_no; | ||
152 | int sge_no; | ||
153 | int sge_bytes; | ||
154 | int sge_off; | ||
155 | int bc; | ||
156 | struct svc_rdma_op_ctxt *ctxt; | ||
157 | int ret = 0; | ||
158 | |||
159 | BUG_ON(sge_count >= 32); | ||
160 | dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " | ||
161 | "write_len=%d, xdr_sge=%p, sge_count=%d\n", | ||
162 | rmr, to, xdr_off, write_len, xdr_sge, sge_count); | ||
163 | |||
164 | ctxt = svc_rdma_get_context(xprt); | ||
165 | ctxt->count = 0; | ||
166 | tmp_sge_ctxt = svc_rdma_get_context(xprt); | ||
167 | sge = tmp_sge_ctxt->sge; | ||
168 | |||
169 | /* Find the SGE associated with xdr_off */ | ||
170 | for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; | ||
171 | xdr_sge_no++) { | ||
172 | if (xdr_sge[xdr_sge_no].length > bc) | ||
173 | break; | ||
174 | bc -= xdr_sge[xdr_sge_no].length; | ||
175 | } | ||
176 | |||
177 | sge_off = bc; | ||
178 | bc = write_len; | ||
179 | sge_no = 0; | ||
180 | |||
181 | /* Copy the remaining SGE */ | ||
182 | while (bc != 0 && xdr_sge_no < sge_count) { | ||
183 | sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; | ||
184 | sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey; | ||
185 | sge_bytes = min((size_t)bc, | ||
186 | (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); | ||
187 | sge[sge_no].length = sge_bytes; | ||
188 | |||
189 | sge_off = 0; | ||
190 | sge_no++; | ||
191 | xdr_sge_no++; | ||
192 | bc -= sge_bytes; | ||
193 | } | ||
194 | |||
195 | BUG_ON(bc != 0); | ||
196 | BUG_ON(xdr_sge_no > sge_count); | ||
197 | |||
198 | /* Prepare WRITE WR */ | ||
199 | memset(&write_wr, 0, sizeof write_wr); | ||
200 | ctxt->wr_op = IB_WR_RDMA_WRITE; | ||
201 | write_wr.wr_id = (unsigned long)ctxt; | ||
202 | write_wr.sg_list = &sge[0]; | ||
203 | write_wr.num_sge = sge_no; | ||
204 | write_wr.opcode = IB_WR_RDMA_WRITE; | ||
205 | write_wr.send_flags = IB_SEND_SIGNALED; | ||
206 | write_wr.wr.rdma.rkey = rmr; | ||
207 | write_wr.wr.rdma.remote_addr = to; | ||
208 | |||
209 | /* Post It */ | ||
210 | atomic_inc(&rdma_stat_write); | ||
211 | if (svc_rdma_send(xprt, &write_wr)) { | ||
212 | svc_rdma_put_context(ctxt, 1); | ||
213 | /* Fatal error, close transport */ | ||
214 | ret = -EIO; | ||
215 | } | ||
216 | svc_rdma_put_context(tmp_sge_ctxt, 0); | ||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | static int send_write_chunks(struct svcxprt_rdma *xprt, | ||
221 | struct rpcrdma_msg *rdma_argp, | ||
222 | struct rpcrdma_msg *rdma_resp, | ||
223 | struct svc_rqst *rqstp, | ||
224 | struct ib_sge *sge, | ||
225 | int sge_count) | ||
226 | { | ||
227 | u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; | ||
228 | int write_len; | ||
229 | int max_write; | ||
230 | u32 xdr_off; | ||
231 | int chunk_off; | ||
232 | int chunk_no; | ||
233 | struct rpcrdma_write_array *arg_ary; | ||
234 | struct rpcrdma_write_array *res_ary; | ||
235 | int ret; | ||
236 | |||
237 | arg_ary = svc_rdma_get_write_array(rdma_argp); | ||
238 | if (!arg_ary) | ||
239 | return 0; | ||
240 | res_ary = (struct rpcrdma_write_array *) | ||
241 | &rdma_resp->rm_body.rm_chunks[1]; | ||
242 | |||
243 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
244 | |||
245 | /* Write chunks start at the pagelist */ | ||
246 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; | ||
247 | xfer_len && chunk_no < arg_ary->wc_nchunks; | ||
248 | chunk_no++) { | ||
249 | struct rpcrdma_segment *arg_ch; | ||
250 | u64 rs_offset; | ||
251 | |||
252 | arg_ch = &arg_ary->wc_array[chunk_no].wc_target; | ||
253 | write_len = min(xfer_len, arg_ch->rs_length); | ||
254 | |||
255 | /* Prepare the response chunk given the length actually | ||
256 | * written */ | ||
257 | rs_offset = get_unaligned(&(arg_ch->rs_offset)); | ||
258 | svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, | ||
259 | arg_ch->rs_handle, | ||
260 | rs_offset, | ||
261 | write_len); | ||
262 | chunk_off = 0; | ||
263 | while (write_len) { | ||
264 | int this_write; | ||
265 | this_write = min(write_len, max_write); | ||
266 | ret = send_write(xprt, rqstp, | ||
267 | arg_ch->rs_handle, | ||
268 | rs_offset + chunk_off, | ||
269 | xdr_off, | ||
270 | this_write, | ||
271 | sge, | ||
272 | sge_count); | ||
273 | if (ret) { | ||
274 | dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", | ||
275 | ret); | ||
276 | return -EIO; | ||
277 | } | ||
278 | chunk_off += this_write; | ||
279 | xdr_off += this_write; | ||
280 | xfer_len -= this_write; | ||
281 | write_len -= this_write; | ||
282 | } | ||
283 | } | ||
284 | /* Update the req with the number of chunks actually used */ | ||
285 | svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); | ||
286 | |||
287 | return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; | ||
288 | } | ||
289 | |||
290 | static int send_reply_chunks(struct svcxprt_rdma *xprt, | ||
291 | struct rpcrdma_msg *rdma_argp, | ||
292 | struct rpcrdma_msg *rdma_resp, | ||
293 | struct svc_rqst *rqstp, | ||
294 | struct ib_sge *sge, | ||
295 | int sge_count) | ||
296 | { | ||
297 | u32 xfer_len = rqstp->rq_res.len; | ||
298 | int write_len; | ||
299 | int max_write; | ||
300 | u32 xdr_off; | ||
301 | int chunk_no; | ||
302 | int chunk_off; | ||
303 | struct rpcrdma_segment *ch; | ||
304 | struct rpcrdma_write_array *arg_ary; | ||
305 | struct rpcrdma_write_array *res_ary; | ||
306 | int ret; | ||
307 | |||
308 | arg_ary = svc_rdma_get_reply_array(rdma_argp); | ||
309 | if (!arg_ary) | ||
310 | return 0; | ||
311 | /* XXX: need to fix when reply lists occur with read-list and or | ||
312 | * write-list */ | ||
313 | res_ary = (struct rpcrdma_write_array *) | ||
314 | &rdma_resp->rm_body.rm_chunks[2]; | ||
315 | |||
316 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
317 | |||
318 | /* xdr offset starts at RPC message */ | ||
319 | for (xdr_off = 0, chunk_no = 0; | ||
320 | xfer_len && chunk_no < arg_ary->wc_nchunks; | ||
321 | chunk_no++) { | ||
322 | u64 rs_offset; | ||
323 | ch = &arg_ary->wc_array[chunk_no].wc_target; | ||
324 | write_len = min(xfer_len, ch->rs_length); | ||
325 | |||
326 | |||
327 | /* Prepare the reply chunk given the length actually | ||
328 | * written */ | ||
329 | rs_offset = get_unaligned(&(ch->rs_offset)); | ||
330 | svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, | ||
331 | ch->rs_handle, rs_offset, | ||
332 | write_len); | ||
333 | chunk_off = 0; | ||
334 | while (write_len) { | ||
335 | int this_write; | ||
336 | |||
337 | this_write = min(write_len, max_write); | ||
338 | ret = send_write(xprt, rqstp, | ||
339 | ch->rs_handle, | ||
340 | rs_offset + chunk_off, | ||
341 | xdr_off, | ||
342 | this_write, | ||
343 | sge, | ||
344 | sge_count); | ||
345 | if (ret) { | ||
346 | dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", | ||
347 | ret); | ||
348 | return -EIO; | ||
349 | } | ||
350 | chunk_off += this_write; | ||
351 | xdr_off += this_write; | ||
352 | xfer_len -= this_write; | ||
353 | write_len -= this_write; | ||
354 | } | ||
355 | } | ||
356 | /* Update the req with the number of chunks actually used */ | ||
357 | svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); | ||
358 | |||
359 | return rqstp->rq_res.len; | ||
360 | } | ||
361 | |||
362 | /* This function prepares the portion of the RPCRDMA message to be | ||
363 | * sent in the RDMA_SEND. This function is called after data sent via | ||
364 | * RDMA has already been transmitted. There are three cases: | ||
365 | * - The RPCRDMA header, RPC header, and payload are all sent in a | ||
366 | * single RDMA_SEND. This is the "inline" case. | ||
367 | * - The RPCRDMA header and some portion of the RPC header and data | ||
368 | * are sent via this RDMA_SEND and another portion of the data is | ||
369 | * sent via RDMA. | ||
370 | * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC | ||
371 | * header and data are all transmitted via RDMA. | ||
372 | * In all three cases, this function prepares the RPCRDMA header in | ||
373 | * sge[0], the 'type' parameter indicates the type to place in the | ||
374 | * RPCRDMA header, and the 'byte_count' field indicates how much of | ||
375 | * the XDR to include in this RDMA_SEND. | ||
376 | */ | ||
377 | static int send_reply(struct svcxprt_rdma *rdma, | ||
378 | struct svc_rqst *rqstp, | ||
379 | struct page *page, | ||
380 | struct rpcrdma_msg *rdma_resp, | ||
381 | struct svc_rdma_op_ctxt *ctxt, | ||
382 | int sge_count, | ||
383 | int byte_count) | ||
384 | { | ||
385 | struct ib_send_wr send_wr; | ||
386 | int sge_no; | ||
387 | int sge_bytes; | ||
388 | int page_no; | ||
389 | int ret; | ||
390 | |||
391 | /* Prepare the context */ | ||
392 | ctxt->pages[0] = page; | ||
393 | ctxt->count = 1; | ||
394 | |||
395 | /* Prepare the SGE for the RPCRDMA Header */ | ||
396 | ctxt->sge[0].addr = | ||
397 | ib_dma_map_page(rdma->sc_cm_id->device, | ||
398 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); | ||
399 | ctxt->direction = DMA_TO_DEVICE; | ||
400 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); | ||
401 | ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; | ||
402 | |||
403 | /* Determine how many of our SGE are to be transmitted */ | ||
404 | for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { | ||
405 | sge_bytes = min((size_t)ctxt->sge[sge_no].length, | ||
406 | (size_t)byte_count); | ||
407 | byte_count -= sge_bytes; | ||
408 | } | ||
409 | BUG_ON(byte_count != 0); | ||
410 | |||
411 | /* Save all respages in the ctxt and remove them from the | ||
412 | * respages array. They are our pages until the I/O | ||
413 | * completes. | ||
414 | */ | ||
415 | for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { | ||
416 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; | ||
417 | ctxt->count++; | ||
418 | rqstp->rq_respages[page_no] = NULL; | ||
419 | } | ||
420 | |||
421 | BUG_ON(sge_no > rdma->sc_max_sge); | ||
422 | memset(&send_wr, 0, sizeof send_wr); | ||
423 | ctxt->wr_op = IB_WR_SEND; | ||
424 | send_wr.wr_id = (unsigned long)ctxt; | ||
425 | send_wr.sg_list = ctxt->sge; | ||
426 | send_wr.num_sge = sge_no; | ||
427 | send_wr.opcode = IB_WR_SEND; | ||
428 | send_wr.send_flags = IB_SEND_SIGNALED; | ||
429 | |||
430 | ret = svc_rdma_send(rdma, &send_wr); | ||
431 | if (ret) | ||
432 | svc_rdma_put_context(ctxt, 1); | ||
433 | |||
434 | return ret; | ||
435 | } | ||
436 | |||
437 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) | ||
438 | { | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * Return the start of an xdr buffer. | ||
443 | */ | ||
444 | static void *xdr_start(struct xdr_buf *xdr) | ||
445 | { | ||
446 | return xdr->head[0].iov_base - | ||
447 | (xdr->len - | ||
448 | xdr->page_len - | ||
449 | xdr->tail[0].iov_len - | ||
450 | xdr->head[0].iov_len); | ||
451 | } | ||
452 | |||
453 | int svc_rdma_sendto(struct svc_rqst *rqstp) | ||
454 | { | ||
455 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
456 | struct svcxprt_rdma *rdma = | ||
457 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
458 | struct rpcrdma_msg *rdma_argp; | ||
459 | struct rpcrdma_msg *rdma_resp; | ||
460 | struct rpcrdma_write_array *reply_ary; | ||
461 | enum rpcrdma_proc reply_type; | ||
462 | int ret; | ||
463 | int inline_bytes; | ||
464 | struct ib_sge *sge; | ||
465 | int sge_count = 0; | ||
466 | struct page *res_page; | ||
467 | struct svc_rdma_op_ctxt *ctxt; | ||
468 | |||
469 | dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); | ||
470 | |||
471 | /* Get the RDMA request header. */ | ||
472 | rdma_argp = xdr_start(&rqstp->rq_arg); | ||
473 | |||
474 | /* Build an SGE for the XDR */ | ||
475 | ctxt = svc_rdma_get_context(rdma); | ||
476 | ctxt->direction = DMA_TO_DEVICE; | ||
477 | sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); | ||
478 | |||
479 | inline_bytes = rqstp->rq_res.len; | ||
480 | |||
481 | /* Create the RDMA response header */ | ||
482 | res_page = svc_rdma_get_page(); | ||
483 | rdma_resp = page_address(res_page); | ||
484 | reply_ary = svc_rdma_get_reply_array(rdma_argp); | ||
485 | if (reply_ary) | ||
486 | reply_type = RDMA_NOMSG; | ||
487 | else | ||
488 | reply_type = RDMA_MSG; | ||
489 | svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, | ||
490 | rdma_resp, reply_type); | ||
491 | |||
492 | /* Send any write-chunk data and build resp write-list */ | ||
493 | ret = send_write_chunks(rdma, rdma_argp, rdma_resp, | ||
494 | rqstp, sge, sge_count); | ||
495 | if (ret < 0) { | ||
496 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", | ||
497 | ret); | ||
498 | goto error; | ||
499 | } | ||
500 | inline_bytes -= ret; | ||
501 | |||
502 | /* Send any reply-list data and update resp reply-list */ | ||
503 | ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, | ||
504 | rqstp, sge, sge_count); | ||
505 | if (ret < 0) { | ||
506 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", | ||
507 | ret); | ||
508 | goto error; | ||
509 | } | ||
510 | inline_bytes -= ret; | ||
511 | |||
512 | ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, | ||
513 | inline_bytes); | ||
514 | dprintk("svcrdma: send_reply returns %d\n", ret); | ||
515 | return ret; | ||
516 | error: | ||
517 | svc_rdma_put_context(ctxt, 0); | ||
518 | put_page(res_page); | ||
519 | return ret; | ||
520 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c new file mode 100644 index 000000000000..f09444c451bc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -0,0 +1,1080 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/svc_xprt.h> | ||
43 | #include <linux/sunrpc/debug.h> | ||
44 | #include <linux/sunrpc/rpc_rdma.h> | ||
45 | #include <linux/spinlock.h> | ||
46 | #include <rdma/ib_verbs.h> | ||
47 | #include <rdma/rdma_cm.h> | ||
48 | #include <linux/sunrpc/svc_rdma.h> | ||
49 | |||
50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
51 | |||
52 | static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | ||
53 | struct sockaddr *sa, int salen, | ||
54 | int flags); | ||
55 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); | ||
56 | static void svc_rdma_release_rqst(struct svc_rqst *); | ||
57 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt); | ||
58 | static void dto_tasklet_func(unsigned long data); | ||
59 | static void svc_rdma_detach(struct svc_xprt *xprt); | ||
60 | static void svc_rdma_free(struct svc_xprt *xprt); | ||
61 | static int svc_rdma_has_wspace(struct svc_xprt *xprt); | ||
62 | static void rq_cq_reap(struct svcxprt_rdma *xprt); | ||
63 | static void sq_cq_reap(struct svcxprt_rdma *xprt); | ||
64 | |||
65 | DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL); | ||
66 | static DEFINE_SPINLOCK(dto_lock); | ||
67 | static LIST_HEAD(dto_xprt_q); | ||
68 | |||
69 | static struct svc_xprt_ops svc_rdma_ops = { | ||
70 | .xpo_create = svc_rdma_create, | ||
71 | .xpo_recvfrom = svc_rdma_recvfrom, | ||
72 | .xpo_sendto = svc_rdma_sendto, | ||
73 | .xpo_release_rqst = svc_rdma_release_rqst, | ||
74 | .xpo_detach = svc_rdma_detach, | ||
75 | .xpo_free = svc_rdma_free, | ||
76 | .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, | ||
77 | .xpo_has_wspace = svc_rdma_has_wspace, | ||
78 | .xpo_accept = svc_rdma_accept, | ||
79 | }; | ||
80 | |||
81 | struct svc_xprt_class svc_rdma_class = { | ||
82 | .xcl_name = "rdma", | ||
83 | .xcl_owner = THIS_MODULE, | ||
84 | .xcl_ops = &svc_rdma_ops, | ||
85 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, | ||
86 | }; | ||
87 | |||
88 | static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) | ||
89 | { | ||
90 | int target; | ||
91 | int at_least_one = 0; | ||
92 | struct svc_rdma_op_ctxt *ctxt; | ||
93 | |||
94 | target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump, | ||
95 | xprt->sc_ctxt_max); | ||
96 | |||
97 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
98 | while (xprt->sc_ctxt_cnt < target) { | ||
99 | xprt->sc_ctxt_cnt++; | ||
100 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
101 | |||
102 | ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); | ||
103 | |||
104 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
105 | if (ctxt) { | ||
106 | at_least_one = 1; | ||
107 | ctxt->next = xprt->sc_ctxt_head; | ||
108 | xprt->sc_ctxt_head = ctxt; | ||
109 | } else { | ||
110 | /* kmalloc failed...give up for now */ | ||
111 | xprt->sc_ctxt_cnt--; | ||
112 | break; | ||
113 | } | ||
114 | } | ||
115 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
116 | dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n", | ||
117 | xprt->sc_ctxt_max, xprt->sc_ctxt_cnt); | ||
118 | return at_least_one; | ||
119 | } | ||
120 | |||
121 | struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | ||
122 | { | ||
123 | struct svc_rdma_op_ctxt *ctxt; | ||
124 | |||
125 | while (1) { | ||
126 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
127 | if (unlikely(xprt->sc_ctxt_head == NULL)) { | ||
128 | /* Try to bump my cache. */ | ||
129 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
130 | |||
131 | if (rdma_bump_context_cache(xprt)) | ||
132 | continue; | ||
133 | |||
134 | printk(KERN_INFO "svcrdma: sleeping waiting for " | ||
135 | "context memory on xprt=%p\n", | ||
136 | xprt); | ||
137 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | ||
138 | continue; | ||
139 | } | ||
140 | ctxt = xprt->sc_ctxt_head; | ||
141 | xprt->sc_ctxt_head = ctxt->next; | ||
142 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
143 | ctxt->xprt = xprt; | ||
144 | INIT_LIST_HEAD(&ctxt->dto_q); | ||
145 | ctxt->count = 0; | ||
146 | break; | ||
147 | } | ||
148 | return ctxt; | ||
149 | } | ||
150 | |||
151 | void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) | ||
152 | { | ||
153 | struct svcxprt_rdma *xprt; | ||
154 | int i; | ||
155 | |||
156 | BUG_ON(!ctxt); | ||
157 | xprt = ctxt->xprt; | ||
158 | if (free_pages) | ||
159 | for (i = 0; i < ctxt->count; i++) | ||
160 | put_page(ctxt->pages[i]); | ||
161 | |||
162 | for (i = 0; i < ctxt->count; i++) | ||
163 | dma_unmap_single(xprt->sc_cm_id->device->dma_device, | ||
164 | ctxt->sge[i].addr, | ||
165 | ctxt->sge[i].length, | ||
166 | ctxt->direction); | ||
167 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
168 | ctxt->next = xprt->sc_ctxt_head; | ||
169 | xprt->sc_ctxt_head = ctxt; | ||
170 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
171 | } | ||
172 | |||
173 | /* ib_cq event handler */ | ||
174 | static void cq_event_handler(struct ib_event *event, void *context) | ||
175 | { | ||
176 | struct svc_xprt *xprt = context; | ||
177 | dprintk("svcrdma: received CQ event id=%d, context=%p\n", | ||
178 | event->event, context); | ||
179 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
180 | } | ||
181 | |||
182 | /* QP event handler */ | ||
183 | static void qp_event_handler(struct ib_event *event, void *context) | ||
184 | { | ||
185 | struct svc_xprt *xprt = context; | ||
186 | |||
187 | switch (event->event) { | ||
188 | /* These are considered benign events */ | ||
189 | case IB_EVENT_PATH_MIG: | ||
190 | case IB_EVENT_COMM_EST: | ||
191 | case IB_EVENT_SQ_DRAINED: | ||
192 | case IB_EVENT_QP_LAST_WQE_REACHED: | ||
193 | dprintk("svcrdma: QP event %d received for QP=%p\n", | ||
194 | event->event, event->element.qp); | ||
195 | break; | ||
196 | /* These are considered fatal events */ | ||
197 | case IB_EVENT_PATH_MIG_ERR: | ||
198 | case IB_EVENT_QP_FATAL: | ||
199 | case IB_EVENT_QP_REQ_ERR: | ||
200 | case IB_EVENT_QP_ACCESS_ERR: | ||
201 | case IB_EVENT_DEVICE_FATAL: | ||
202 | default: | ||
203 | dprintk("svcrdma: QP ERROR event %d received for QP=%p, " | ||
204 | "closing transport\n", | ||
205 | event->event, event->element.qp); | ||
206 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
207 | break; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Data Transfer Operation Tasklet | ||
213 | * | ||
214 | * Walks a list of transports with I/O pending, removing entries as | ||
215 | * they are added to the server's I/O pending list. Two bits indicate | ||
216 | * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave | ||
217 | * spinlock that serializes access to the transport list with the RQ | ||
218 | * and SQ interrupt handlers. | ||
219 | */ | ||
220 | static void dto_tasklet_func(unsigned long data) | ||
221 | { | ||
222 | struct svcxprt_rdma *xprt; | ||
223 | unsigned long flags; | ||
224 | |||
225 | spin_lock_irqsave(&dto_lock, flags); | ||
226 | while (!list_empty(&dto_xprt_q)) { | ||
227 | xprt = list_entry(dto_xprt_q.next, | ||
228 | struct svcxprt_rdma, sc_dto_q); | ||
229 | list_del_init(&xprt->sc_dto_q); | ||
230 | spin_unlock_irqrestore(&dto_lock, flags); | ||
231 | |||
232 | if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { | ||
233 | ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); | ||
234 | rq_cq_reap(xprt); | ||
235 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
236 | /* | ||
237 | * If data arrived before established event, | ||
238 | * don't enqueue. This defers RPC I/O until the | ||
239 | * RDMA connection is complete. | ||
240 | */ | ||
241 | if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) | ||
242 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
243 | } | ||
244 | |||
245 | if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) { | ||
246 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); | ||
247 | sq_cq_reap(xprt); | ||
248 | } | ||
249 | |||
250 | spin_lock_irqsave(&dto_lock, flags); | ||
251 | } | ||
252 | spin_unlock_irqrestore(&dto_lock, flags); | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Receive Queue Completion Handler | ||
257 | * | ||
258 | * Since an RQ completion handler is called on interrupt context, we | ||
259 | * need to defer the handling of the I/O to a tasklet | ||
260 | */ | ||
261 | static void rq_comp_handler(struct ib_cq *cq, void *cq_context) | ||
262 | { | ||
263 | struct svcxprt_rdma *xprt = cq_context; | ||
264 | unsigned long flags; | ||
265 | |||
266 | /* | ||
267 | * Set the bit regardless of whether or not it's on the list | ||
268 | * because it may be on the list already due to an SQ | ||
269 | * completion. | ||
270 | */ | ||
271 | set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); | ||
272 | |||
273 | /* | ||
274 | * If this transport is not already on the DTO transport queue, | ||
275 | * add it | ||
276 | */ | ||
277 | spin_lock_irqsave(&dto_lock, flags); | ||
278 | if (list_empty(&xprt->sc_dto_q)) | ||
279 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | ||
280 | spin_unlock_irqrestore(&dto_lock, flags); | ||
281 | |||
282 | /* Tasklet does all the work to avoid irqsave locks. */ | ||
283 | tasklet_schedule(&dto_tasklet); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * rq_cq_reap - Process the RQ CQ. | ||
288 | * | ||
289 | * Take all completing WC off the CQE and enqueue the associated DTO | ||
290 | * context on the dto_q for the transport. | ||
291 | */ | ||
292 | static void rq_cq_reap(struct svcxprt_rdma *xprt) | ||
293 | { | ||
294 | int ret; | ||
295 | struct ib_wc wc; | ||
296 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
297 | |||
298 | atomic_inc(&rdma_stat_rq_poll); | ||
299 | |||
300 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
301 | while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { | ||
302 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
303 | ctxt->wc_status = wc.status; | ||
304 | ctxt->byte_len = wc.byte_len; | ||
305 | if (wc.status != IB_WC_SUCCESS) { | ||
306 | /* Close the transport */ | ||
307 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
308 | svc_rdma_put_context(ctxt, 1); | ||
309 | continue; | ||
310 | } | ||
311 | list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); | ||
312 | } | ||
313 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
314 | |||
315 | if (ctxt) | ||
316 | atomic_inc(&rdma_stat_rq_prod); | ||
317 | } | ||
318 | |||
319 | /* | ||
320 | * Send Queue Completion Handler - potentially called on interrupt context. | ||
321 | */ | ||
322 | static void sq_cq_reap(struct svcxprt_rdma *xprt) | ||
323 | { | ||
324 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
325 | struct ib_wc wc; | ||
326 | struct ib_cq *cq = xprt->sc_sq_cq; | ||
327 | int ret; | ||
328 | |||
329 | atomic_inc(&rdma_stat_sq_poll); | ||
330 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { | ||
331 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
332 | xprt = ctxt->xprt; | ||
333 | |||
334 | if (wc.status != IB_WC_SUCCESS) | ||
335 | /* Close the transport */ | ||
336 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
337 | |||
338 | /* Decrement used SQ WR count */ | ||
339 | atomic_dec(&xprt->sc_sq_count); | ||
340 | wake_up(&xprt->sc_send_wait); | ||
341 | |||
342 | switch (ctxt->wr_op) { | ||
343 | case IB_WR_SEND: | ||
344 | case IB_WR_RDMA_WRITE: | ||
345 | svc_rdma_put_context(ctxt, 1); | ||
346 | break; | ||
347 | |||
348 | case IB_WR_RDMA_READ: | ||
349 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
350 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
351 | set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); | ||
352 | spin_lock_bh(&xprt->sc_read_complete_lock); | ||
353 | list_add_tail(&ctxt->dto_q, | ||
354 | &xprt->sc_read_complete_q); | ||
355 | spin_unlock_bh(&xprt->sc_read_complete_lock); | ||
356 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
357 | } | ||
358 | break; | ||
359 | |||
360 | default: | ||
361 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
362 | "opcode=%d, status=%d\n", | ||
363 | wc.opcode, wc.status); | ||
364 | break; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | if (ctxt) | ||
369 | atomic_inc(&rdma_stat_sq_prod); | ||
370 | } | ||
371 | |||
372 | static void sq_comp_handler(struct ib_cq *cq, void *cq_context) | ||
373 | { | ||
374 | struct svcxprt_rdma *xprt = cq_context; | ||
375 | unsigned long flags; | ||
376 | |||
377 | /* | ||
378 | * Set the bit regardless of whether or not it's on the list | ||
379 | * because it may be on the list already due to an RQ | ||
380 | * completion. | ||
381 | */ | ||
382 | set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); | ||
383 | |||
384 | /* | ||
385 | * If this transport is not already on the DTO transport queue, | ||
386 | * add it | ||
387 | */ | ||
388 | spin_lock_irqsave(&dto_lock, flags); | ||
389 | if (list_empty(&xprt->sc_dto_q)) | ||
390 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | ||
391 | spin_unlock_irqrestore(&dto_lock, flags); | ||
392 | |||
393 | /* Tasklet does all the work to avoid irqsave locks. */ | ||
394 | tasklet_schedule(&dto_tasklet); | ||
395 | } | ||
396 | |||
397 | static void create_context_cache(struct svcxprt_rdma *xprt, | ||
398 | int ctxt_count, int ctxt_bump, int ctxt_max) | ||
399 | { | ||
400 | struct svc_rdma_op_ctxt *ctxt; | ||
401 | int i; | ||
402 | |||
403 | xprt->sc_ctxt_max = ctxt_max; | ||
404 | xprt->sc_ctxt_bump = ctxt_bump; | ||
405 | xprt->sc_ctxt_cnt = 0; | ||
406 | xprt->sc_ctxt_head = NULL; | ||
407 | for (i = 0; i < ctxt_count; i++) { | ||
408 | ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); | ||
409 | if (ctxt) { | ||
410 | ctxt->next = xprt->sc_ctxt_head; | ||
411 | xprt->sc_ctxt_head = ctxt; | ||
412 | xprt->sc_ctxt_cnt++; | ||
413 | } | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) | ||
418 | { | ||
419 | struct svc_rdma_op_ctxt *next; | ||
420 | if (!ctxt) | ||
421 | return; | ||
422 | |||
423 | do { | ||
424 | next = ctxt->next; | ||
425 | kfree(ctxt); | ||
426 | ctxt = next; | ||
427 | } while (next); | ||
428 | } | ||
429 | |||
430 | static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | ||
431 | int listener) | ||
432 | { | ||
433 | struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); | ||
434 | |||
435 | if (!cma_xprt) | ||
436 | return NULL; | ||
437 | svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv); | ||
438 | INIT_LIST_HEAD(&cma_xprt->sc_accept_q); | ||
439 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); | ||
440 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); | ||
441 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); | ||
442 | init_waitqueue_head(&cma_xprt->sc_send_wait); | ||
443 | |||
444 | spin_lock_init(&cma_xprt->sc_lock); | ||
445 | spin_lock_init(&cma_xprt->sc_read_complete_lock); | ||
446 | spin_lock_init(&cma_xprt->sc_ctxt_lock); | ||
447 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); | ||
448 | |||
449 | cma_xprt->sc_ord = svcrdma_ord; | ||
450 | |||
451 | cma_xprt->sc_max_req_size = svcrdma_max_req_size; | ||
452 | cma_xprt->sc_max_requests = svcrdma_max_requests; | ||
453 | cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; | ||
454 | atomic_set(&cma_xprt->sc_sq_count, 0); | ||
455 | |||
456 | if (!listener) { | ||
457 | int reqs = cma_xprt->sc_max_requests; | ||
458 | create_context_cache(cma_xprt, | ||
459 | reqs << 1, /* starting size */ | ||
460 | reqs, /* bump amount */ | ||
461 | reqs + | ||
462 | cma_xprt->sc_sq_depth + | ||
463 | RPCRDMA_MAX_THREADS + 1); /* max */ | ||
464 | if (!cma_xprt->sc_ctxt_head) { | ||
465 | kfree(cma_xprt); | ||
466 | return NULL; | ||
467 | } | ||
468 | clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); | ||
469 | } else | ||
470 | set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); | ||
471 | |||
472 | return cma_xprt; | ||
473 | } | ||
474 | |||
475 | struct page *svc_rdma_get_page(void) | ||
476 | { | ||
477 | struct page *page; | ||
478 | |||
479 | while ((page = alloc_page(GFP_KERNEL)) == NULL) { | ||
480 | /* If we can't get memory, wait a bit and try again */ | ||
481 | printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 " | ||
482 | "jiffies.\n"); | ||
483 | schedule_timeout_uninterruptible(msecs_to_jiffies(1000)); | ||
484 | } | ||
485 | return page; | ||
486 | } | ||
487 | |||
488 | int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | ||
489 | { | ||
490 | struct ib_recv_wr recv_wr, *bad_recv_wr; | ||
491 | struct svc_rdma_op_ctxt *ctxt; | ||
492 | struct page *page; | ||
493 | unsigned long pa; | ||
494 | int sge_no; | ||
495 | int buflen; | ||
496 | int ret; | ||
497 | |||
498 | ctxt = svc_rdma_get_context(xprt); | ||
499 | buflen = 0; | ||
500 | ctxt->direction = DMA_FROM_DEVICE; | ||
501 | for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { | ||
502 | BUG_ON(sge_no >= xprt->sc_max_sge); | ||
503 | page = svc_rdma_get_page(); | ||
504 | ctxt->pages[sge_no] = page; | ||
505 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | ||
506 | page, 0, PAGE_SIZE, | ||
507 | DMA_FROM_DEVICE); | ||
508 | ctxt->sge[sge_no].addr = pa; | ||
509 | ctxt->sge[sge_no].length = PAGE_SIZE; | ||
510 | ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
511 | buflen += PAGE_SIZE; | ||
512 | } | ||
513 | ctxt->count = sge_no; | ||
514 | recv_wr.next = NULL; | ||
515 | recv_wr.sg_list = &ctxt->sge[0]; | ||
516 | recv_wr.num_sge = ctxt->count; | ||
517 | recv_wr.wr_id = (u64)(unsigned long)ctxt; | ||
518 | |||
519 | ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); | ||
520 | return ret; | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * This function handles the CONNECT_REQUEST event on a listening | ||
525 | * endpoint. It is passed the cma_id for the _new_ connection. The context in | ||
526 | * this cma_id is inherited from the listening cma_id and is the svc_xprt | ||
527 | * structure for the listening endpoint. | ||
528 | * | ||
529 | * This function creates a new xprt for the new connection and enqueues it on | ||
530 | * the accept queue for the listent xprt. When the listen thread is kicked, it | ||
531 | * will call the recvfrom method on the listen xprt which will accept the new | ||
532 | * connection. | ||
533 | */ | ||
534 | static void handle_connect_req(struct rdma_cm_id *new_cma_id) | ||
535 | { | ||
536 | struct svcxprt_rdma *listen_xprt = new_cma_id->context; | ||
537 | struct svcxprt_rdma *newxprt; | ||
538 | |||
539 | /* Create a new transport */ | ||
540 | newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); | ||
541 | if (!newxprt) { | ||
542 | dprintk("svcrdma: failed to create new transport\n"); | ||
543 | return; | ||
544 | } | ||
545 | newxprt->sc_cm_id = new_cma_id; | ||
546 | new_cma_id->context = newxprt; | ||
547 | dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", | ||
548 | newxprt, newxprt->sc_cm_id, listen_xprt); | ||
549 | |||
550 | /* | ||
551 | * Enqueue the new transport on the accept queue of the listening | ||
552 | * transport | ||
553 | */ | ||
554 | spin_lock_bh(&listen_xprt->sc_lock); | ||
555 | list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q); | ||
556 | spin_unlock_bh(&listen_xprt->sc_lock); | ||
557 | |||
558 | /* | ||
559 | * Can't use svc_xprt_received here because we are not on a | ||
560 | * rqstp thread | ||
561 | */ | ||
562 | set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags); | ||
563 | svc_xprt_enqueue(&listen_xprt->sc_xprt); | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * Handles events generated on the listening endpoint. These events will be | ||
568 | * either be incoming connect requests or adapter removal events. | ||
569 | */ | ||
570 | static int rdma_listen_handler(struct rdma_cm_id *cma_id, | ||
571 | struct rdma_cm_event *event) | ||
572 | { | ||
573 | struct svcxprt_rdma *xprt = cma_id->context; | ||
574 | int ret = 0; | ||
575 | |||
576 | switch (event->event) { | ||
577 | case RDMA_CM_EVENT_CONNECT_REQUEST: | ||
578 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " | ||
579 | "event=%d\n", cma_id, cma_id->context, event->event); | ||
580 | handle_connect_req(cma_id); | ||
581 | break; | ||
582 | |||
583 | case RDMA_CM_EVENT_ESTABLISHED: | ||
584 | /* Accept complete */ | ||
585 | dprintk("svcrdma: Connection completed on LISTEN xprt=%p, " | ||
586 | "cm_id=%p\n", xprt, cma_id); | ||
587 | break; | ||
588 | |||
589 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
590 | dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", | ||
591 | xprt, cma_id); | ||
592 | if (xprt) | ||
593 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
594 | break; | ||
595 | |||
596 | default: | ||
597 | dprintk("svcrdma: Unexpected event on listening endpoint %p, " | ||
598 | "event=%d\n", cma_id, event->event); | ||
599 | break; | ||
600 | } | ||
601 | |||
602 | return ret; | ||
603 | } | ||
604 | |||
605 | static int rdma_cma_handler(struct rdma_cm_id *cma_id, | ||
606 | struct rdma_cm_event *event) | ||
607 | { | ||
608 | struct svc_xprt *xprt = cma_id->context; | ||
609 | struct svcxprt_rdma *rdma = | ||
610 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
611 | switch (event->event) { | ||
612 | case RDMA_CM_EVENT_ESTABLISHED: | ||
613 | /* Accept complete */ | ||
614 | dprintk("svcrdma: Connection completed on DTO xprt=%p, " | ||
615 | "cm_id=%p\n", xprt, cma_id); | ||
616 | clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); | ||
617 | svc_xprt_enqueue(xprt); | ||
618 | break; | ||
619 | case RDMA_CM_EVENT_DISCONNECTED: | ||
620 | dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n", | ||
621 | xprt, cma_id); | ||
622 | if (xprt) { | ||
623 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
624 | svc_xprt_enqueue(xprt); | ||
625 | } | ||
626 | break; | ||
627 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
628 | dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " | ||
629 | "event=%d\n", cma_id, xprt, event->event); | ||
630 | if (xprt) { | ||
631 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
632 | svc_xprt_enqueue(xprt); | ||
633 | } | ||
634 | break; | ||
635 | default: | ||
636 | dprintk("svcrdma: Unexpected event on DTO endpoint %p, " | ||
637 | "event=%d\n", cma_id, event->event); | ||
638 | break; | ||
639 | } | ||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | /* | ||
644 | * Create a listening RDMA service endpoint. | ||
645 | */ | ||
646 | static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | ||
647 | struct sockaddr *sa, int salen, | ||
648 | int flags) | ||
649 | { | ||
650 | struct rdma_cm_id *listen_id; | ||
651 | struct svcxprt_rdma *cma_xprt; | ||
652 | struct svc_xprt *xprt; | ||
653 | int ret; | ||
654 | |||
655 | dprintk("svcrdma: Creating RDMA socket\n"); | ||
656 | |||
657 | cma_xprt = rdma_create_xprt(serv, 1); | ||
658 | if (!cma_xprt) | ||
659 | return ERR_PTR(ENOMEM); | ||
660 | xprt = &cma_xprt->sc_xprt; | ||
661 | |||
662 | listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); | ||
663 | if (IS_ERR(listen_id)) { | ||
664 | rdma_destroy_xprt(cma_xprt); | ||
665 | dprintk("svcrdma: rdma_create_id failed = %ld\n", | ||
666 | PTR_ERR(listen_id)); | ||
667 | return (void *)listen_id; | ||
668 | } | ||
669 | ret = rdma_bind_addr(listen_id, sa); | ||
670 | if (ret) { | ||
671 | rdma_destroy_xprt(cma_xprt); | ||
672 | rdma_destroy_id(listen_id); | ||
673 | dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); | ||
674 | return ERR_PTR(ret); | ||
675 | } | ||
676 | cma_xprt->sc_cm_id = listen_id; | ||
677 | |||
678 | ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); | ||
679 | if (ret) { | ||
680 | rdma_destroy_id(listen_id); | ||
681 | rdma_destroy_xprt(cma_xprt); | ||
682 | dprintk("svcrdma: rdma_listen failed = %d\n", ret); | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * We need to use the address from the cm_id in case the | ||
687 | * caller specified 0 for the port number. | ||
688 | */ | ||
689 | sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr; | ||
690 | svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); | ||
691 | |||
692 | return &cma_xprt->sc_xprt; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * This is the xpo_recvfrom function for listening endpoints. Its | ||
697 | * purpose is to accept incoming connections. The CMA callback handler | ||
698 | * has already created a new transport and attached it to the new CMA | ||
699 | * ID. | ||
700 | * | ||
701 | * There is a queue of pending connections hung on the listening | ||
702 | * transport. This queue contains the new svc_xprt structure. This | ||
703 | * function takes svc_xprt structures off the accept_q and completes | ||
704 | * the connection. | ||
705 | */ | ||
706 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | ||
707 | { | ||
708 | struct svcxprt_rdma *listen_rdma; | ||
709 | struct svcxprt_rdma *newxprt = NULL; | ||
710 | struct rdma_conn_param conn_param; | ||
711 | struct ib_qp_init_attr qp_attr; | ||
712 | struct ib_device_attr devattr; | ||
713 | struct sockaddr *sa; | ||
714 | int ret; | ||
715 | int i; | ||
716 | |||
717 | listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
718 | clear_bit(XPT_CONN, &xprt->xpt_flags); | ||
719 | /* Get the next entry off the accept list */ | ||
720 | spin_lock_bh(&listen_rdma->sc_lock); | ||
721 | if (!list_empty(&listen_rdma->sc_accept_q)) { | ||
722 | newxprt = list_entry(listen_rdma->sc_accept_q.next, | ||
723 | struct svcxprt_rdma, sc_accept_q); | ||
724 | list_del_init(&newxprt->sc_accept_q); | ||
725 | } | ||
726 | if (!list_empty(&listen_rdma->sc_accept_q)) | ||
727 | set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags); | ||
728 | spin_unlock_bh(&listen_rdma->sc_lock); | ||
729 | if (!newxprt) | ||
730 | return NULL; | ||
731 | |||
732 | dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", | ||
733 | newxprt, newxprt->sc_cm_id); | ||
734 | |||
735 | ret = ib_query_device(newxprt->sc_cm_id->device, &devattr); | ||
736 | if (ret) { | ||
737 | dprintk("svcrdma: could not query device attributes on " | ||
738 | "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret); | ||
739 | goto errout; | ||
740 | } | ||
741 | |||
742 | /* Qualify the transport resource defaults with the | ||
743 | * capabilities of this particular device */ | ||
744 | newxprt->sc_max_sge = min((size_t)devattr.max_sge, | ||
745 | (size_t)RPCSVC_MAXPAGES); | ||
746 | newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr, | ||
747 | (size_t)svcrdma_max_requests); | ||
748 | newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; | ||
749 | |||
750 | newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom, | ||
751 | (size_t)svcrdma_ord); | ||
752 | |||
753 | newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); | ||
754 | if (IS_ERR(newxprt->sc_pd)) { | ||
755 | dprintk("svcrdma: error creating PD for connect request\n"); | ||
756 | goto errout; | ||
757 | } | ||
758 | newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device, | ||
759 | sq_comp_handler, | ||
760 | cq_event_handler, | ||
761 | newxprt, | ||
762 | newxprt->sc_sq_depth, | ||
763 | 0); | ||
764 | if (IS_ERR(newxprt->sc_sq_cq)) { | ||
765 | dprintk("svcrdma: error creating SQ CQ for connect request\n"); | ||
766 | goto errout; | ||
767 | } | ||
768 | newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device, | ||
769 | rq_comp_handler, | ||
770 | cq_event_handler, | ||
771 | newxprt, | ||
772 | newxprt->sc_max_requests, | ||
773 | 0); | ||
774 | if (IS_ERR(newxprt->sc_rq_cq)) { | ||
775 | dprintk("svcrdma: error creating RQ CQ for connect request\n"); | ||
776 | goto errout; | ||
777 | } | ||
778 | |||
779 | memset(&qp_attr, 0, sizeof qp_attr); | ||
780 | qp_attr.event_handler = qp_event_handler; | ||
781 | qp_attr.qp_context = &newxprt->sc_xprt; | ||
782 | qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; | ||
783 | qp_attr.cap.max_recv_wr = newxprt->sc_max_requests; | ||
784 | qp_attr.cap.max_send_sge = newxprt->sc_max_sge; | ||
785 | qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; | ||
786 | qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | ||
787 | qp_attr.qp_type = IB_QPT_RC; | ||
788 | qp_attr.send_cq = newxprt->sc_sq_cq; | ||
789 | qp_attr.recv_cq = newxprt->sc_rq_cq; | ||
790 | dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n" | ||
791 | " cm_id->device=%p, sc_pd->device=%p\n" | ||
792 | " cap.max_send_wr = %d\n" | ||
793 | " cap.max_recv_wr = %d\n" | ||
794 | " cap.max_send_sge = %d\n" | ||
795 | " cap.max_recv_sge = %d\n", | ||
796 | newxprt->sc_cm_id, newxprt->sc_pd, | ||
797 | newxprt->sc_cm_id->device, newxprt->sc_pd->device, | ||
798 | qp_attr.cap.max_send_wr, | ||
799 | qp_attr.cap.max_recv_wr, | ||
800 | qp_attr.cap.max_send_sge, | ||
801 | qp_attr.cap.max_recv_sge); | ||
802 | |||
803 | ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); | ||
804 | if (ret) { | ||
805 | /* | ||
806 | * XXX: This is a hack. We need a xx_request_qp interface | ||
807 | * that will adjust the qp_attr's with a best-effort | ||
808 | * number | ||
809 | */ | ||
810 | qp_attr.cap.max_send_sge -= 2; | ||
811 | qp_attr.cap.max_recv_sge -= 2; | ||
812 | ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, | ||
813 | &qp_attr); | ||
814 | if (ret) { | ||
815 | dprintk("svcrdma: failed to create QP, ret=%d\n", ret); | ||
816 | goto errout; | ||
817 | } | ||
818 | newxprt->sc_max_sge = qp_attr.cap.max_send_sge; | ||
819 | newxprt->sc_max_sge = qp_attr.cap.max_recv_sge; | ||
820 | newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; | ||
821 | newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; | ||
822 | } | ||
823 | newxprt->sc_qp = newxprt->sc_cm_id->qp; | ||
824 | |||
825 | /* Register all of physical memory */ | ||
826 | newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, | ||
827 | IB_ACCESS_LOCAL_WRITE | | ||
828 | IB_ACCESS_REMOTE_WRITE); | ||
829 | if (IS_ERR(newxprt->sc_phys_mr)) { | ||
830 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); | ||
831 | goto errout; | ||
832 | } | ||
833 | |||
834 | /* Post receive buffers */ | ||
835 | for (i = 0; i < newxprt->sc_max_requests; i++) { | ||
836 | ret = svc_rdma_post_recv(newxprt); | ||
837 | if (ret) { | ||
838 | dprintk("svcrdma: failure posting receive buffers\n"); | ||
839 | goto errout; | ||
840 | } | ||
841 | } | ||
842 | |||
843 | /* Swap out the handler */ | ||
844 | newxprt->sc_cm_id->event_handler = rdma_cma_handler; | ||
845 | |||
846 | /* Accept Connection */ | ||
847 | set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); | ||
848 | memset(&conn_param, 0, sizeof conn_param); | ||
849 | conn_param.responder_resources = 0; | ||
850 | conn_param.initiator_depth = newxprt->sc_ord; | ||
851 | ret = rdma_accept(newxprt->sc_cm_id, &conn_param); | ||
852 | if (ret) { | ||
853 | dprintk("svcrdma: failed to accept new connection, ret=%d\n", | ||
854 | ret); | ||
855 | goto errout; | ||
856 | } | ||
857 | |||
858 | dprintk("svcrdma: new connection %p accepted with the following " | ||
859 | "attributes:\n" | ||
860 | " local_ip : %d.%d.%d.%d\n" | ||
861 | " local_port : %d\n" | ||
862 | " remote_ip : %d.%d.%d.%d\n" | ||
863 | " remote_port : %d\n" | ||
864 | " max_sge : %d\n" | ||
865 | " sq_depth : %d\n" | ||
866 | " max_requests : %d\n" | ||
867 | " ord : %d\n", | ||
868 | newxprt, | ||
869 | NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
870 | route.addr.src_addr)->sin_addr.s_addr), | ||
871 | ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
872 | route.addr.src_addr)->sin_port), | ||
873 | NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
874 | route.addr.dst_addr)->sin_addr.s_addr), | ||
875 | ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
876 | route.addr.dst_addr)->sin_port), | ||
877 | newxprt->sc_max_sge, | ||
878 | newxprt->sc_sq_depth, | ||
879 | newxprt->sc_max_requests, | ||
880 | newxprt->sc_ord); | ||
881 | |||
882 | /* Set the local and remote addresses in the transport */ | ||
883 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; | ||
884 | svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); | ||
885 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; | ||
886 | svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); | ||
887 | |||
888 | ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); | ||
889 | ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); | ||
890 | return &newxprt->sc_xprt; | ||
891 | |||
892 | errout: | ||
893 | dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); | ||
894 | rdma_destroy_id(newxprt->sc_cm_id); | ||
895 | rdma_destroy_xprt(newxprt); | ||
896 | return NULL; | ||
897 | } | ||
898 | |||
899 | /* | ||
900 | * Post an RQ WQE to the RQ when the rqst is being released. This | ||
901 | * effectively returns an RQ credit to the client. The rq_xprt_ctxt | ||
902 | * will be null if the request is deferred due to an RDMA_READ or the | ||
903 | * transport had no data ready (EAGAIN). Note that an RPC deferred in | ||
904 | * svc_process will still return the credit, this is because the data | ||
905 | * is copied and no longer consume a WQE/WC. | ||
906 | */ | ||
907 | static void svc_rdma_release_rqst(struct svc_rqst *rqstp) | ||
908 | { | ||
909 | int err; | ||
910 | struct svcxprt_rdma *rdma = | ||
911 | container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); | ||
912 | if (rqstp->rq_xprt_ctxt) { | ||
913 | BUG_ON(rqstp->rq_xprt_ctxt != rdma); | ||
914 | err = svc_rdma_post_recv(rdma); | ||
915 | if (err) | ||
916 | dprintk("svcrdma: failed to post an RQ WQE error=%d\n", | ||
917 | err); | ||
918 | } | ||
919 | rqstp->rq_xprt_ctxt = NULL; | ||
920 | } | ||
921 | |||
922 | /* Disable data ready events for this connection */ | ||
923 | static void svc_rdma_detach(struct svc_xprt *xprt) | ||
924 | { | ||
925 | struct svcxprt_rdma *rdma = | ||
926 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
927 | unsigned long flags; | ||
928 | |||
929 | dprintk("svc: svc_rdma_detach(%p)\n", xprt); | ||
930 | /* | ||
931 | * Shutdown the connection. This will ensure we don't get any | ||
932 | * more events from the provider. | ||
933 | */ | ||
934 | rdma_disconnect(rdma->sc_cm_id); | ||
935 | rdma_destroy_id(rdma->sc_cm_id); | ||
936 | |||
937 | /* We may already be on the DTO list */ | ||
938 | spin_lock_irqsave(&dto_lock, flags); | ||
939 | if (!list_empty(&rdma->sc_dto_q)) | ||
940 | list_del_init(&rdma->sc_dto_q); | ||
941 | spin_unlock_irqrestore(&dto_lock, flags); | ||
942 | } | ||
943 | |||
944 | static void svc_rdma_free(struct svc_xprt *xprt) | ||
945 | { | ||
946 | struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; | ||
947 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); | ||
948 | rdma_destroy_xprt(rdma); | ||
949 | kfree(rdma); | ||
950 | } | ||
951 | |||
952 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt) | ||
953 | { | ||
954 | if (xprt->sc_qp && !IS_ERR(xprt->sc_qp)) | ||
955 | ib_destroy_qp(xprt->sc_qp); | ||
956 | |||
957 | if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq)) | ||
958 | ib_destroy_cq(xprt->sc_sq_cq); | ||
959 | |||
960 | if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq)) | ||
961 | ib_destroy_cq(xprt->sc_rq_cq); | ||
962 | |||
963 | if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr)) | ||
964 | ib_dereg_mr(xprt->sc_phys_mr); | ||
965 | |||
966 | if (xprt->sc_pd && !IS_ERR(xprt->sc_pd)) | ||
967 | ib_dealloc_pd(xprt->sc_pd); | ||
968 | |||
969 | destroy_context_cache(xprt->sc_ctxt_head); | ||
970 | } | ||
971 | |||
972 | static int svc_rdma_has_wspace(struct svc_xprt *xprt) | ||
973 | { | ||
974 | struct svcxprt_rdma *rdma = | ||
975 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
976 | |||
977 | /* | ||
978 | * If there are fewer SQ WR available than required to send a | ||
979 | * simple response, return false. | ||
980 | */ | ||
981 | if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3)) | ||
982 | return 0; | ||
983 | |||
984 | /* | ||
985 | * ...or there are already waiters on the SQ, | ||
986 | * return false. | ||
987 | */ | ||
988 | if (waitqueue_active(&rdma->sc_send_wait)) | ||
989 | return 0; | ||
990 | |||
991 | /* Otherwise return true. */ | ||
992 | return 1; | ||
993 | } | ||
994 | |||
995 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | ||
996 | { | ||
997 | struct ib_send_wr *bad_wr; | ||
998 | int ret; | ||
999 | |||
1000 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) | ||
1001 | return 0; | ||
1002 | |||
1003 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); | ||
1004 | BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != | ||
1005 | wr->opcode); | ||
1006 | /* If the SQ is full, wait until an SQ entry is available */ | ||
1007 | while (1) { | ||
1008 | spin_lock_bh(&xprt->sc_lock); | ||
1009 | if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { | ||
1010 | spin_unlock_bh(&xprt->sc_lock); | ||
1011 | atomic_inc(&rdma_stat_sq_starve); | ||
1012 | /* See if we can reap some SQ WR */ | ||
1013 | sq_cq_reap(xprt); | ||
1014 | |||
1015 | /* Wait until SQ WR available if SQ still full */ | ||
1016 | wait_event(xprt->sc_send_wait, | ||
1017 | atomic_read(&xprt->sc_sq_count) < | ||
1018 | xprt->sc_sq_depth); | ||
1019 | continue; | ||
1020 | } | ||
1021 | /* Bumped used SQ WR count and post */ | ||
1022 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); | ||
1023 | if (!ret) | ||
1024 | atomic_inc(&xprt->sc_sq_count); | ||
1025 | else | ||
1026 | dprintk("svcrdma: failed to post SQ WR rc=%d, " | ||
1027 | "sc_sq_count=%d, sc_sq_depth=%d\n", | ||
1028 | ret, atomic_read(&xprt->sc_sq_count), | ||
1029 | xprt->sc_sq_depth); | ||
1030 | spin_unlock_bh(&xprt->sc_lock); | ||
1031 | break; | ||
1032 | } | ||
1033 | return ret; | ||
1034 | } | ||
1035 | |||
1036 | int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | ||
1037 | enum rpcrdma_errcode err) | ||
1038 | { | ||
1039 | struct ib_send_wr err_wr; | ||
1040 | struct ib_sge sge; | ||
1041 | struct page *p; | ||
1042 | struct svc_rdma_op_ctxt *ctxt; | ||
1043 | u32 *va; | ||
1044 | int length; | ||
1045 | int ret; | ||
1046 | |||
1047 | p = svc_rdma_get_page(); | ||
1048 | va = page_address(p); | ||
1049 | |||
1050 | /* XDR encode error */ | ||
1051 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); | ||
1052 | |||
1053 | /* Prepare SGE for local address */ | ||
1054 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, | ||
1055 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); | ||
1056 | sge.lkey = xprt->sc_phys_mr->lkey; | ||
1057 | sge.length = length; | ||
1058 | |||
1059 | ctxt = svc_rdma_get_context(xprt); | ||
1060 | ctxt->count = 1; | ||
1061 | ctxt->pages[0] = p; | ||
1062 | |||
1063 | /* Prepare SEND WR */ | ||
1064 | memset(&err_wr, 0, sizeof err_wr); | ||
1065 | ctxt->wr_op = IB_WR_SEND; | ||
1066 | err_wr.wr_id = (unsigned long)ctxt; | ||
1067 | err_wr.sg_list = &sge; | ||
1068 | err_wr.num_sge = 1; | ||
1069 | err_wr.opcode = IB_WR_SEND; | ||
1070 | err_wr.send_flags = IB_SEND_SIGNALED; | ||
1071 | |||
1072 | /* Post It */ | ||
1073 | ret = svc_rdma_send(xprt, &err_wr); | ||
1074 | if (ret) { | ||
1075 | dprintk("svcrdma: Error posting send = %d\n", ret); | ||
1076 | svc_rdma_put_context(ctxt, 1); | ||
1077 | } | ||
1078 | |||
1079 | return ret; | ||
1080 | } | ||