diff options
60 files changed, 5450 insertions, 1664 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 91082e60d289..6cae13718925 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -2247,7 +2247,7 @@ P: J. Bruce Fields | |||
2247 | M: bfields@fieldses.org | 2247 | M: bfields@fieldses.org |
2248 | P: Neil Brown | 2248 | P: Neil Brown |
2249 | M: neilb@suse.de | 2249 | M: neilb@suse.de |
2250 | L: nfs@lists.sourceforge.net | 2250 | L: linux-nfs@vger.kernel.org |
2251 | W: http://nfs.sourceforge.net/ | 2251 | W: http://nfs.sourceforge.net/ |
2252 | S: Supported | 2252 | S: Supported |
2253 | 2253 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index 219ec06a8c7e..987b5d7cb21a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -1674,6 +1674,8 @@ config NFSD | |||
1674 | select CRYPTO_MD5 if NFSD_V4 | 1674 | select CRYPTO_MD5 if NFSD_V4 |
1675 | select CRYPTO if NFSD_V4 | 1675 | select CRYPTO if NFSD_V4 |
1676 | select FS_POSIX_ACL if NFSD_V4 | 1676 | select FS_POSIX_ACL if NFSD_V4 |
1677 | select PROC_FS if NFSD_V4 | ||
1678 | select PROC_FS if SUNRPC_GSS | ||
1677 | help | 1679 | help |
1678 | If you want your Linux box to act as an NFS *server*, so that other | 1680 | If you want your Linux box to act as an NFS *server*, so that other |
1679 | computers on your local network which support NFS can access certain | 1681 | computers on your local network which support NFS can access certain |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 572601e98dcd..ca6b16fc3101 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -34,10 +34,10 @@ static DEFINE_MUTEX(nlm_host_mutex); | |||
34 | 34 | ||
35 | static void nlm_gc_hosts(void); | 35 | static void nlm_gc_hosts(void); |
36 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, | 36 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, |
37 | const char *, int, int); | 37 | const char *, unsigned int, int); |
38 | static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, | 38 | static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, |
39 | const char *hostname, | 39 | const char *hostname, |
40 | int hostname_len); | 40 | unsigned int hostname_len); |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Common host lookup routine for server & client | 43 | * Common host lookup routine for server & client |
@@ -45,7 +45,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, | |||
45 | static struct nlm_host * | 45 | static struct nlm_host * |
46 | nlm_lookup_host(int server, const struct sockaddr_in *sin, | 46 | nlm_lookup_host(int server, const struct sockaddr_in *sin, |
47 | int proto, int version, const char *hostname, | 47 | int proto, int version, const char *hostname, |
48 | int hostname_len, const struct sockaddr_in *ssin) | 48 | unsigned int hostname_len, |
49 | const struct sockaddr_in *ssin) | ||
49 | { | 50 | { |
50 | struct hlist_head *chain; | 51 | struct hlist_head *chain; |
51 | struct hlist_node *pos; | 52 | struct hlist_node *pos; |
@@ -176,7 +177,7 @@ nlm_destroy_host(struct nlm_host *host) | |||
176 | */ | 177 | */ |
177 | struct nlm_host * | 178 | struct nlm_host * |
178 | nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, | 179 | nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, |
179 | const char *hostname, int hostname_len) | 180 | const char *hostname, unsigned int hostname_len) |
180 | { | 181 | { |
181 | struct sockaddr_in ssin = {0}; | 182 | struct sockaddr_in ssin = {0}; |
182 | 183 | ||
@@ -189,7 +190,7 @@ nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, | |||
189 | */ | 190 | */ |
190 | struct nlm_host * | 191 | struct nlm_host * |
191 | nlmsvc_lookup_host(struct svc_rqst *rqstp, | 192 | nlmsvc_lookup_host(struct svc_rqst *rqstp, |
192 | const char *hostname, int hostname_len) | 193 | const char *hostname, unsigned int hostname_len) |
193 | { | 194 | { |
194 | struct sockaddr_in ssin = {0}; | 195 | struct sockaddr_in ssin = {0}; |
195 | 196 | ||
@@ -307,7 +308,8 @@ void nlm_release_host(struct nlm_host *host) | |||
307 | * Release all resources held by that peer. | 308 | * Release all resources held by that peer. |
308 | */ | 309 | */ |
309 | void nlm_host_rebooted(const struct sockaddr_in *sin, | 310 | void nlm_host_rebooted(const struct sockaddr_in *sin, |
310 | const char *hostname, int hostname_len, | 311 | const char *hostname, |
312 | unsigned int hostname_len, | ||
311 | u32 new_state) | 313 | u32 new_state) |
312 | { | 314 | { |
313 | struct hlist_head *chain; | 315 | struct hlist_head *chain; |
@@ -377,8 +379,13 @@ nlm_shutdown_hosts(void) | |||
377 | /* First, make all hosts eligible for gc */ | 379 | /* First, make all hosts eligible for gc */ |
378 | dprintk("lockd: nuking all hosts...\n"); | 380 | dprintk("lockd: nuking all hosts...\n"); |
379 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { | 381 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { |
380 | hlist_for_each_entry(host, pos, chain, h_hash) | 382 | hlist_for_each_entry(host, pos, chain, h_hash) { |
381 | host->h_expires = jiffies - 1; | 383 | host->h_expires = jiffies - 1; |
384 | if (host->h_rpcclnt) { | ||
385 | rpc_shutdown_client(host->h_rpcclnt); | ||
386 | host->h_rpcclnt = NULL; | ||
387 | } | ||
388 | } | ||
382 | } | 389 | } |
383 | 390 | ||
384 | /* Then, perform a garbage collection pass */ | 391 | /* Then, perform a garbage collection pass */ |
@@ -449,7 +456,7 @@ static DEFINE_MUTEX(nsm_mutex); | |||
449 | 456 | ||
450 | static struct nsm_handle * | 457 | static struct nsm_handle * |
451 | __nsm_find(const struct sockaddr_in *sin, | 458 | __nsm_find(const struct sockaddr_in *sin, |
452 | const char *hostname, int hostname_len, | 459 | const char *hostname, unsigned int hostname_len, |
453 | int create) | 460 | int create) |
454 | { | 461 | { |
455 | struct nsm_handle *nsm = NULL; | 462 | struct nsm_handle *nsm = NULL; |
@@ -503,7 +510,8 @@ out: | |||
503 | } | 510 | } |
504 | 511 | ||
505 | static struct nsm_handle * | 512 | static struct nsm_handle * |
506 | nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len) | 513 | nsm_find(const struct sockaddr_in *sin, const char *hostname, |
514 | unsigned int hostname_len) | ||
507 | { | 515 | { |
508 | return __nsm_find(sin, hostname, hostname_len, 1); | 516 | return __nsm_find(sin, hostname, hostname_len, 1); |
509 | } | 517 | } |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 82e2192a0d5c..08226464e563 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -219,19 +219,6 @@ lockd(struct svc_rqst *rqstp) | |||
219 | module_put_and_exit(0); | 219 | module_put_and_exit(0); |
220 | } | 220 | } |
221 | 221 | ||
222 | |||
223 | static int find_socket(struct svc_serv *serv, int proto) | ||
224 | { | ||
225 | struct svc_sock *svsk; | ||
226 | int found = 0; | ||
227 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) | ||
228 | if (svsk->sk_sk->sk_protocol == proto) { | ||
229 | found = 1; | ||
230 | break; | ||
231 | } | ||
232 | return found; | ||
233 | } | ||
234 | |||
235 | /* | 222 | /* |
236 | * Make any sockets that are needed but not present. | 223 | * Make any sockets that are needed but not present. |
237 | * If nlm_udpport or nlm_tcpport were set as module | 224 | * If nlm_udpport or nlm_tcpport were set as module |
@@ -240,17 +227,25 @@ static int find_socket(struct svc_serv *serv, int proto) | |||
240 | static int make_socks(struct svc_serv *serv, int proto) | 227 | static int make_socks(struct svc_serv *serv, int proto) |
241 | { | 228 | { |
242 | static int warned; | 229 | static int warned; |
230 | struct svc_xprt *xprt; | ||
243 | int err = 0; | 231 | int err = 0; |
244 | 232 | ||
245 | if (proto == IPPROTO_UDP || nlm_udpport) | 233 | if (proto == IPPROTO_UDP || nlm_udpport) { |
246 | if (!find_socket(serv, IPPROTO_UDP)) | 234 | xprt = svc_find_xprt(serv, "udp", 0, 0); |
247 | err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport, | 235 | if (!xprt) |
248 | SVC_SOCK_DEFAULTS); | 236 | err = svc_create_xprt(serv, "udp", nlm_udpport, |
249 | if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) | 237 | SVC_SOCK_DEFAULTS); |
250 | if (!find_socket(serv, IPPROTO_TCP)) | 238 | else |
251 | err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport, | 239 | svc_xprt_put(xprt); |
252 | SVC_SOCK_DEFAULTS); | 240 | } |
253 | 241 | if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) { | |
242 | xprt = svc_find_xprt(serv, "tcp", 0, 0); | ||
243 | if (!xprt) | ||
244 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, | ||
245 | SVC_SOCK_DEFAULTS); | ||
246 | else | ||
247 | svc_xprt_put(xprt); | ||
248 | } | ||
254 | if (err >= 0) { | 249 | if (err >= 0) { |
255 | warned = 0; | 250 | warned = 0; |
256 | err = 0; | 251 | err = 0; |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index bf27b6c6cb6b..385437e3387d 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -84,6 +84,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
84 | { | 84 | { |
85 | struct nlm_host *host; | 85 | struct nlm_host *host; |
86 | struct nlm_file *file; | 86 | struct nlm_file *file; |
87 | int rc = rpc_success; | ||
87 | 88 | ||
88 | dprintk("lockd: TEST4 called\n"); | 89 | dprintk("lockd: TEST4 called\n"); |
89 | resp->cookie = argp->cookie; | 90 | resp->cookie = argp->cookie; |
@@ -91,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
91 | /* Don't accept test requests during grace period */ | 92 | /* Don't accept test requests during grace period */ |
92 | if (nlmsvc_grace_period) { | 93 | if (nlmsvc_grace_period) { |
93 | resp->status = nlm_lck_denied_grace_period; | 94 | resp->status = nlm_lck_denied_grace_period; |
94 | return rpc_success; | 95 | return rc; |
95 | } | 96 | } |
96 | 97 | ||
97 | /* Obtain client and file */ | 98 | /* Obtain client and file */ |
@@ -101,12 +102,13 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
101 | /* Now check for conflicting locks */ | 102 | /* Now check for conflicting locks */ |
102 | resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); | 103 | resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); |
103 | if (resp->status == nlm_drop_reply) | 104 | if (resp->status == nlm_drop_reply) |
104 | return rpc_drop_reply; | 105 | rc = rpc_drop_reply; |
106 | else | ||
107 | dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); | ||
105 | 108 | ||
106 | dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); | ||
107 | nlm_release_host(host); | 109 | nlm_release_host(host); |
108 | nlm_release_file(file); | 110 | nlm_release_file(file); |
109 | return rpc_success; | 111 | return rc; |
110 | } | 112 | } |
111 | 113 | ||
112 | static __be32 | 114 | static __be32 |
@@ -115,6 +117,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
115 | { | 117 | { |
116 | struct nlm_host *host; | 118 | struct nlm_host *host; |
117 | struct nlm_file *file; | 119 | struct nlm_file *file; |
120 | int rc = rpc_success; | ||
118 | 121 | ||
119 | dprintk("lockd: LOCK called\n"); | 122 | dprintk("lockd: LOCK called\n"); |
120 | 123 | ||
@@ -123,7 +126,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
123 | /* Don't accept new lock requests during grace period */ | 126 | /* Don't accept new lock requests during grace period */ |
124 | if (nlmsvc_grace_period && !argp->reclaim) { | 127 | if (nlmsvc_grace_period && !argp->reclaim) { |
125 | resp->status = nlm_lck_denied_grace_period; | 128 | resp->status = nlm_lck_denied_grace_period; |
126 | return rpc_success; | 129 | return rc; |
127 | } | 130 | } |
128 | 131 | ||
129 | /* Obtain client and file */ | 132 | /* Obtain client and file */ |
@@ -146,12 +149,13 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
146 | resp->status = nlmsvc_lock(rqstp, file, &argp->lock, | 149 | resp->status = nlmsvc_lock(rqstp, file, &argp->lock, |
147 | argp->block, &argp->cookie); | 150 | argp->block, &argp->cookie); |
148 | if (resp->status == nlm_drop_reply) | 151 | if (resp->status == nlm_drop_reply) |
149 | return rpc_drop_reply; | 152 | rc = rpc_drop_reply; |
153 | else | ||
154 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
150 | 155 | ||
151 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
152 | nlm_release_host(host); | 156 | nlm_release_host(host); |
153 | nlm_release_file(file); | 157 | nlm_release_file(file); |
154 | return rpc_success; | 158 | return rc; |
155 | } | 159 | } |
156 | 160 | ||
157 | static __be32 | 161 | static __be32 |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index d120ec39bcb0..2f4d8fa66689 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -501,25 +501,29 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
501 | block, block->b_flags, block->b_fl); | 501 | block, block->b_flags, block->b_fl); |
502 | if (block->b_flags & B_TIMED_OUT) { | 502 | if (block->b_flags & B_TIMED_OUT) { |
503 | nlmsvc_unlink_block(block); | 503 | nlmsvc_unlink_block(block); |
504 | return nlm_lck_denied; | 504 | ret = nlm_lck_denied; |
505 | goto out; | ||
505 | } | 506 | } |
506 | if (block->b_flags & B_GOT_CALLBACK) { | 507 | if (block->b_flags & B_GOT_CALLBACK) { |
508 | nlmsvc_unlink_block(block); | ||
507 | if (block->b_fl != NULL | 509 | if (block->b_fl != NULL |
508 | && block->b_fl->fl_type != F_UNLCK) { | 510 | && block->b_fl->fl_type != F_UNLCK) { |
509 | lock->fl = *block->b_fl; | 511 | lock->fl = *block->b_fl; |
510 | goto conf_lock; | 512 | goto conf_lock; |
511 | } | 513 | } else { |
512 | else { | 514 | ret = nlm_granted; |
513 | nlmsvc_unlink_block(block); | 515 | goto out; |
514 | return nlm_granted; | ||
515 | } | 516 | } |
516 | } | 517 | } |
517 | return nlm_drop_reply; | 518 | ret = nlm_drop_reply; |
519 | goto out; | ||
518 | } | 520 | } |
519 | 521 | ||
520 | error = vfs_test_lock(file->f_file, &lock->fl); | 522 | error = vfs_test_lock(file->f_file, &lock->fl); |
521 | if (error == -EINPROGRESS) | 523 | if (error == -EINPROGRESS) { |
522 | return nlmsvc_defer_lock_rqst(rqstp, block); | 524 | ret = nlmsvc_defer_lock_rqst(rqstp, block); |
525 | goto out; | ||
526 | } | ||
523 | if (error) { | 527 | if (error) { |
524 | ret = nlm_lck_denied_nolocks; | 528 | ret = nlm_lck_denied_nolocks; |
525 | goto out; | 529 | goto out; |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 9cd5c8b37593..88379cc6e0b1 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -113,6 +113,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
113 | { | 113 | { |
114 | struct nlm_host *host; | 114 | struct nlm_host *host; |
115 | struct nlm_file *file; | 115 | struct nlm_file *file; |
116 | int rc = rpc_success; | ||
116 | 117 | ||
117 | dprintk("lockd: TEST called\n"); | 118 | dprintk("lockd: TEST called\n"); |
118 | resp->cookie = argp->cookie; | 119 | resp->cookie = argp->cookie; |
@@ -120,7 +121,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
120 | /* Don't accept test requests during grace period */ | 121 | /* Don't accept test requests during grace period */ |
121 | if (nlmsvc_grace_period) { | 122 | if (nlmsvc_grace_period) { |
122 | resp->status = nlm_lck_denied_grace_period; | 123 | resp->status = nlm_lck_denied_grace_period; |
123 | return rpc_success; | 124 | return rc; |
124 | } | 125 | } |
125 | 126 | ||
126 | /* Obtain client and file */ | 127 | /* Obtain client and file */ |
@@ -130,13 +131,14 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
130 | /* Now check for conflicting locks */ | 131 | /* Now check for conflicting locks */ |
131 | resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); | 132 | resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); |
132 | if (resp->status == nlm_drop_reply) | 133 | if (resp->status == nlm_drop_reply) |
133 | return rpc_drop_reply; | 134 | rc = rpc_drop_reply; |
135 | else | ||
136 | dprintk("lockd: TEST status %d vers %d\n", | ||
137 | ntohl(resp->status), rqstp->rq_vers); | ||
134 | 138 | ||
135 | dprintk("lockd: TEST status %d vers %d\n", | ||
136 | ntohl(resp->status), rqstp->rq_vers); | ||
137 | nlm_release_host(host); | 139 | nlm_release_host(host); |
138 | nlm_release_file(file); | 140 | nlm_release_file(file); |
139 | return rpc_success; | 141 | return rc; |
140 | } | 142 | } |
141 | 143 | ||
142 | static __be32 | 144 | static __be32 |
@@ -145,6 +147,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
145 | { | 147 | { |
146 | struct nlm_host *host; | 148 | struct nlm_host *host; |
147 | struct nlm_file *file; | 149 | struct nlm_file *file; |
150 | int rc = rpc_success; | ||
148 | 151 | ||
149 | dprintk("lockd: LOCK called\n"); | 152 | dprintk("lockd: LOCK called\n"); |
150 | 153 | ||
@@ -153,7 +156,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
153 | /* Don't accept new lock requests during grace period */ | 156 | /* Don't accept new lock requests during grace period */ |
154 | if (nlmsvc_grace_period && !argp->reclaim) { | 157 | if (nlmsvc_grace_period && !argp->reclaim) { |
155 | resp->status = nlm_lck_denied_grace_period; | 158 | resp->status = nlm_lck_denied_grace_period; |
156 | return rpc_success; | 159 | return rc; |
157 | } | 160 | } |
158 | 161 | ||
159 | /* Obtain client and file */ | 162 | /* Obtain client and file */ |
@@ -176,12 +179,13 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
176 | resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, | 179 | resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, |
177 | argp->block, &argp->cookie)); | 180 | argp->block, &argp->cookie)); |
178 | if (resp->status == nlm_drop_reply) | 181 | if (resp->status == nlm_drop_reply) |
179 | return rpc_drop_reply; | 182 | rc = rpc_drop_reply; |
183 | else | ||
184 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
180 | 185 | ||
181 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
182 | nlm_release_host(host); | 186 | nlm_release_host(host); |
183 | nlm_release_file(file); | 187 | nlm_release_file(file); |
184 | return rpc_success; | 188 | return rc; |
185 | } | 189 | } |
186 | 190 | ||
187 | static __be32 | 191 | static __be32 |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 84ebba33b98d..dbbefbcd6712 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -87,7 +87,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, | |||
87 | unsigned int hash; | 87 | unsigned int hash; |
88 | __be32 nfserr; | 88 | __be32 nfserr; |
89 | 89 | ||
90 | nlm_debug_print_fh("nlm_file_lookup", f); | 90 | nlm_debug_print_fh("nlm_lookup_file", f); |
91 | 91 | ||
92 | hash = file_hash(f); | 92 | hash = file_hash(f); |
93 | 93 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 9b6bbf1b9787..bd185a572a23 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -119,8 +119,8 @@ int nfs_callback_up(void) | |||
119 | if (!serv) | 119 | if (!serv) |
120 | goto out_err; | 120 | goto out_err; |
121 | 121 | ||
122 | ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport, | 122 | ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport, |
123 | SVC_SOCK_ANONYMOUS); | 123 | SVC_SOCK_ANONYMOUS); |
124 | if (ret <= 0) | 124 | if (ret <= 0) |
125 | goto out_destroy; | 125 | goto out_destroy; |
126 | nfs_callback_tcpport = ret; | 126 | nfs_callback_tcpport = ret; |
diff --git a/include/linux/nfsd/auth.h b/fs/nfsd/auth.h index 0fb9f7212195..78b3c0e93822 100644 --- a/include/linux/nfsd/auth.h +++ b/fs/nfsd/auth.h | |||
@@ -1,6 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * include/linux/nfsd/auth.h | ||
3 | * | ||
4 | * nfsd-specific authentication stuff. | 2 | * nfsd-specific authentication stuff. |
5 | * uid/gid mapping not yet implemented. | 3 | * uid/gid mapping not yet implemented. |
6 | * | 4 | * |
@@ -10,8 +8,6 @@ | |||
10 | #ifndef LINUX_NFSD_AUTH_H | 8 | #ifndef LINUX_NFSD_AUTH_H |
11 | #define LINUX_NFSD_AUTH_H | 9 | #define LINUX_NFSD_AUTH_H |
12 | 10 | ||
13 | #ifdef __KERNEL__ | ||
14 | |||
15 | #define nfsd_luid(rq, uid) ((u32)(uid)) | 11 | #define nfsd_luid(rq, uid) ((u32)(uid)) |
16 | #define nfsd_lgid(rq, gid) ((u32)(gid)) | 12 | #define nfsd_lgid(rq, gid) ((u32)(gid)) |
17 | #define nfsd_ruid(rq, uid) ((u32)(uid)) | 13 | #define nfsd_ruid(rq, uid) ((u32)(uid)) |
@@ -23,5 +19,4 @@ | |||
23 | */ | 19 | */ |
24 | int nfsd_setuser(struct svc_rqst *, struct svc_export *); | 20 | int nfsd_setuser(struct svc_rqst *, struct svc_export *); |
25 | 21 | ||
26 | #endif /* __KERNEL__ */ | ||
27 | #endif /* LINUX_NFSD_AUTH_H */ | 22 | #endif /* LINUX_NFSD_AUTH_H */ |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 66d0aeb32a47..79b4bf812960 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -1357,8 +1357,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) | |||
1357 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); | 1357 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); |
1358 | 1358 | ||
1359 | exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); | 1359 | exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); |
1360 | if (PTR_ERR(exp) == -ENOENT) | ||
1361 | return nfserr_perm; | ||
1362 | if (IS_ERR(exp)) | 1360 | if (IS_ERR(exp)) |
1363 | return nfserrno(PTR_ERR(exp)); | 1361 | return nfserrno(PTR_ERR(exp)); |
1364 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); | 1362 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); |
@@ -1637,13 +1635,19 @@ exp_verify_string(char *cp, int max) | |||
1637 | /* | 1635 | /* |
1638 | * Initialize the exports module. | 1636 | * Initialize the exports module. |
1639 | */ | 1637 | */ |
1640 | void | 1638 | int |
1641 | nfsd_export_init(void) | 1639 | nfsd_export_init(void) |
1642 | { | 1640 | { |
1641 | int rv; | ||
1643 | dprintk("nfsd: initializing export module.\n"); | 1642 | dprintk("nfsd: initializing export module.\n"); |
1644 | 1643 | ||
1645 | cache_register(&svc_export_cache); | 1644 | rv = cache_register(&svc_export_cache); |
1646 | cache_register(&svc_expkey_cache); | 1645 | if (rv) |
1646 | return rv; | ||
1647 | rv = cache_register(&svc_expkey_cache); | ||
1648 | if (rv) | ||
1649 | cache_unregister(&svc_export_cache); | ||
1650 | return rv; | ||
1647 | 1651 | ||
1648 | } | 1652 | } |
1649 | 1653 | ||
@@ -1670,10 +1674,8 @@ nfsd_export_shutdown(void) | |||
1670 | 1674 | ||
1671 | exp_writelock(); | 1675 | exp_writelock(); |
1672 | 1676 | ||
1673 | if (cache_unregister(&svc_expkey_cache)) | 1677 | cache_unregister(&svc_expkey_cache); |
1674 | printk(KERN_ERR "nfsd: failed to unregister expkey cache\n"); | 1678 | cache_unregister(&svc_export_cache); |
1675 | if (cache_unregister(&svc_export_cache)) | ||
1676 | printk(KERN_ERR "nfsd: failed to unregister export cache\n"); | ||
1677 | svcauth_unix_purge(); | 1679 | svcauth_unix_purge(); |
1678 | 1680 | ||
1679 | exp_writeunlock(); | 1681 | exp_writeunlock(); |
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 0e5fa11e6b44..1c3b7654e966 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c | |||
@@ -221,12 +221,17 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, | |||
221 | struct nfsd3_getaclres *resp) | 221 | struct nfsd3_getaclres *resp) |
222 | { | 222 | { |
223 | struct dentry *dentry = resp->fh.fh_dentry; | 223 | struct dentry *dentry = resp->fh.fh_dentry; |
224 | struct inode *inode = dentry->d_inode; | 224 | struct inode *inode; |
225 | struct kvec *head = rqstp->rq_res.head; | 225 | struct kvec *head = rqstp->rq_res.head; |
226 | unsigned int base; | 226 | unsigned int base; |
227 | int n; | 227 | int n; |
228 | int w; | 228 | int w; |
229 | 229 | ||
230 | /* | ||
231 | * Since this is version 2, the check for nfserr in | ||
232 | * nfsd_dispatch actually ensures the following cannot happen. | ||
233 | * However, it seems fragile to depend on that. | ||
234 | */ | ||
230 | if (dentry == NULL || dentry->d_inode == NULL) | 235 | if (dentry == NULL || dentry->d_inode == NULL) |
231 | return 0; | 236 | return 0; |
232 | inode = dentry->d_inode; | 237 | inode = dentry->d_inode; |
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f917fd25858a..d7647f70e02b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/sunrpc/svc.h> | 21 | #include <linux/sunrpc/svc.h> |
22 | #include <linux/nfsd/nfsd.h> | 22 | #include <linux/nfsd/nfsd.h> |
23 | #include <linux/nfsd/xdr3.h> | 23 | #include <linux/nfsd/xdr3.h> |
24 | #include "auth.h" | ||
24 | 25 | ||
25 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 26 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
26 | 27 | ||
@@ -88,10 +89,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp) | |||
88 | * no slashes or null bytes. | 89 | * no slashes or null bytes. |
89 | */ | 90 | */ |
90 | static __be32 * | 91 | static __be32 * |
91 | decode_filename(__be32 *p, char **namp, int *lenp) | 92 | decode_filename(__be32 *p, char **namp, unsigned int *lenp) |
92 | { | 93 | { |
93 | char *name; | 94 | char *name; |
94 | int i; | 95 | unsigned int i; |
95 | 96 | ||
96 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { | 97 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { |
97 | for (i = 0, name = *namp; i < *lenp; i++, name++) { | 98 | for (i = 0, name = *namp; i < *lenp; i++, name++) { |
@@ -452,8 +453,7 @@ int | |||
452 | nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, | 453 | nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, |
453 | struct nfsd3_symlinkargs *args) | 454 | struct nfsd3_symlinkargs *args) |
454 | { | 455 | { |
455 | unsigned int len; | 456 | unsigned int len, avail; |
456 | int avail; | ||
457 | char *old, *new; | 457 | char *old, *new; |
458 | struct kvec *vec; | 458 | struct kvec *vec; |
459 | 459 | ||
@@ -486,7 +486,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, | |||
486 | /* now copy next page if there is one */ | 486 | /* now copy next page if there is one */ |
487 | if (len && !avail && rqstp->rq_arg.page_len) { | 487 | if (len && !avail && rqstp->rq_arg.page_len) { |
488 | avail = rqstp->rq_arg.page_len; | 488 | avail = rqstp->rq_arg.page_len; |
489 | if (avail > PAGE_SIZE) avail = PAGE_SIZE; | 489 | if (avail > PAGE_SIZE) |
490 | avail = PAGE_SIZE; | ||
490 | old = page_address(rqstp->rq_arg.pages[0]); | 491 | old = page_address(rqstp->rq_arg.pages[0]); |
491 | } | 492 | } |
492 | while (len && avail && *old) { | 493 | while (len && avail && *old) { |
@@ -816,11 +817,11 @@ static __be32 * | |||
816 | encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, | 817 | encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, |
817 | struct svc_fh *fhp) | 818 | struct svc_fh *fhp) |
818 | { | 819 | { |
819 | p = encode_post_op_attr(cd->rqstp, p, fhp); | 820 | p = encode_post_op_attr(cd->rqstp, p, fhp); |
820 | *p++ = xdr_one; /* yes, a file handle follows */ | 821 | *p++ = xdr_one; /* yes, a file handle follows */ |
821 | p = encode_fh(p, fhp); | 822 | p = encode_fh(p, fhp); |
822 | fh_put(fhp); | 823 | fh_put(fhp); |
823 | return p; | 824 | return p; |
824 | } | 825 | } |
825 | 826 | ||
826 | static int | 827 | static int |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 9d536a8cb379..aae2b29ae2c9 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -350,30 +350,6 @@ static struct rpc_version * nfs_cb_version[] = { | |||
350 | static int do_probe_callback(void *data) | 350 | static int do_probe_callback(void *data) |
351 | { | 351 | { |
352 | struct nfs4_client *clp = data; | 352 | struct nfs4_client *clp = data; |
353 | struct nfs4_callback *cb = &clp->cl_callback; | ||
354 | struct rpc_message msg = { | ||
355 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | ||
356 | .rpc_argp = clp, | ||
357 | }; | ||
358 | int status; | ||
359 | |||
360 | status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT); | ||
361 | |||
362 | if (status) { | ||
363 | rpc_shutdown_client(cb->cb_client); | ||
364 | cb->cb_client = NULL; | ||
365 | } else | ||
366 | atomic_set(&cb->cb_set, 1); | ||
367 | put_nfs4_client(clp); | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... | ||
373 | */ | ||
374 | void | ||
375 | nfsd4_probe_callback(struct nfs4_client *clp) | ||
376 | { | ||
377 | struct sockaddr_in addr; | 353 | struct sockaddr_in addr; |
378 | struct nfs4_callback *cb = &clp->cl_callback; | 354 | struct nfs4_callback *cb = &clp->cl_callback; |
379 | struct rpc_timeout timeparms = { | 355 | struct rpc_timeout timeparms = { |
@@ -390,13 +366,15 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
390 | .timeout = &timeparms, | 366 | .timeout = &timeparms, |
391 | .program = program, | 367 | .program = program, |
392 | .version = nfs_cb_version[1]->number, | 368 | .version = nfs_cb_version[1]->number, |
393 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ | 369 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ |
394 | .flags = (RPC_CLNT_CREATE_NOPING), | 370 | .flags = (RPC_CLNT_CREATE_NOPING), |
395 | }; | 371 | }; |
396 | struct task_struct *t; | 372 | struct rpc_message msg = { |
397 | 373 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | |
398 | if (atomic_read(&cb->cb_set)) | 374 | .rpc_argp = clp, |
399 | return; | 375 | }; |
376 | struct rpc_clnt *client; | ||
377 | int status; | ||
400 | 378 | ||
401 | /* Initialize address */ | 379 | /* Initialize address */ |
402 | memset(&addr, 0, sizeof(addr)); | 380 | memset(&addr, 0, sizeof(addr)); |
@@ -416,29 +394,50 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
416 | program->stats->program = program; | 394 | program->stats->program = program; |
417 | 395 | ||
418 | /* Create RPC client */ | 396 | /* Create RPC client */ |
419 | cb->cb_client = rpc_create(&args); | 397 | client = rpc_create(&args); |
420 | if (IS_ERR(cb->cb_client)) { | 398 | if (IS_ERR(client)) { |
421 | dprintk("NFSD: couldn't create callback client\n"); | 399 | dprintk("NFSD: couldn't create callback client\n"); |
400 | status = PTR_ERR(client); | ||
422 | goto out_err; | 401 | goto out_err; |
423 | } | 402 | } |
424 | 403 | ||
404 | status = rpc_call_sync(client, &msg, RPC_TASK_SOFT); | ||
405 | |||
406 | if (status) | ||
407 | goto out_release_client; | ||
408 | |||
409 | cb->cb_client = client; | ||
410 | atomic_set(&cb->cb_set, 1); | ||
411 | put_nfs4_client(clp); | ||
412 | return 0; | ||
413 | out_release_client: | ||
414 | rpc_shutdown_client(client); | ||
415 | out_err: | ||
416 | put_nfs4_client(clp); | ||
417 | dprintk("NFSD: warning: no callback path to client %.*s\n", | ||
418 | (int)clp->cl_name.len, clp->cl_name.data); | ||
419 | return status; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... | ||
424 | */ | ||
425 | void | ||
426 | nfsd4_probe_callback(struct nfs4_client *clp) | ||
427 | { | ||
428 | struct task_struct *t; | ||
429 | |||
430 | BUG_ON(atomic_read(&clp->cl_callback.cb_set)); | ||
431 | |||
425 | /* the task holds a reference to the nfs4_client struct */ | 432 | /* the task holds a reference to the nfs4_client struct */ |
426 | atomic_inc(&clp->cl_count); | 433 | atomic_inc(&clp->cl_count); |
427 | 434 | ||
428 | t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); | 435 | t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); |
429 | 436 | ||
430 | if (IS_ERR(t)) | 437 | if (IS_ERR(t)) |
431 | goto out_release_clp; | 438 | atomic_dec(&clp->cl_count); |
432 | 439 | ||
433 | return; | 440 | return; |
434 | |||
435 | out_release_clp: | ||
436 | atomic_dec(&clp->cl_count); | ||
437 | rpc_shutdown_client(cb->cb_client); | ||
438 | out_err: | ||
439 | cb->cb_client = NULL; | ||
440 | dprintk("NFSD: warning: no callback path to client %.*s\n", | ||
441 | (int)clp->cl_name.len, clp->cl_name.data); | ||
442 | } | 441 | } |
443 | 442 | ||
444 | /* | 443 | /* |
@@ -458,9 +457,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
458 | int retries = 1; | 457 | int retries = 1; |
459 | int status = 0; | 458 | int status = 0; |
460 | 459 | ||
461 | if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt) | ||
462 | return; | ||
463 | |||
464 | cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ | 460 | cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ |
465 | cbr->cbr_dp = dp; | 461 | cbr->cbr_dp = dp; |
466 | 462 | ||
@@ -469,6 +465,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
469 | switch (status) { | 465 | switch (status) { |
470 | case -EIO: | 466 | case -EIO: |
471 | /* Network partition? */ | 467 | /* Network partition? */ |
468 | atomic_set(&clp->cl_callback.cb_set, 0); | ||
472 | case -EBADHANDLE: | 469 | case -EBADHANDLE: |
473 | case -NFS4ERR_BAD_STATEID: | 470 | case -NFS4ERR_BAD_STATEID: |
474 | /* Race: client probably got cb_recall | 471 | /* Race: client probably got cb_recall |
@@ -481,11 +478,10 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
481 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); | 478 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); |
482 | } | 479 | } |
483 | out_put_cred: | 480 | out_put_cred: |
484 | if (status == -EIO) | 481 | /* |
485 | atomic_set(&clp->cl_callback.cb_set, 0); | 482 | * Success or failure, now we're either waiting for lease expiration |
486 | /* Success or failure, now we're either waiting for lease expiration | 483 | * or deleg_return. |
487 | * or deleg_return. */ | 484 | */ |
488 | dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count)); | ||
489 | put_nfs4_client(clp); | 485 | put_nfs4_client(clp); |
490 | nfs4_put_delegation(dp); | 486 | nfs4_put_delegation(dp); |
491 | return; | 487 | return; |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 4c0c683ce07a..996bd88b75ba 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -255,13 +255,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
255 | goto out; | 255 | goto out; |
256 | if (len == 0) | 256 | if (len == 0) |
257 | set_bit(CACHE_NEGATIVE, &ent.h.flags); | 257 | set_bit(CACHE_NEGATIVE, &ent.h.flags); |
258 | else { | 258 | else if (len >= IDMAP_NAMESZ) |
259 | if (error >= IDMAP_NAMESZ) { | 259 | goto out; |
260 | error = -EINVAL; | 260 | else |
261 | goto out; | ||
262 | } | ||
263 | memcpy(ent.name, buf1, sizeof(ent.name)); | 261 | memcpy(ent.name, buf1, sizeof(ent.name)); |
264 | } | ||
265 | error = -ENOMEM; | 262 | error = -ENOMEM; |
266 | res = idtoname_update(&ent, res); | 263 | res = idtoname_update(&ent, res); |
267 | if (res == NULL) | 264 | if (res == NULL) |
@@ -467,20 +464,25 @@ nametoid_update(struct ent *new, struct ent *old) | |||
467 | * Exported API | 464 | * Exported API |
468 | */ | 465 | */ |
469 | 466 | ||
470 | void | 467 | int |
471 | nfsd_idmap_init(void) | 468 | nfsd_idmap_init(void) |
472 | { | 469 | { |
473 | cache_register(&idtoname_cache); | 470 | int rv; |
474 | cache_register(&nametoid_cache); | 471 | |
472 | rv = cache_register(&idtoname_cache); | ||
473 | if (rv) | ||
474 | return rv; | ||
475 | rv = cache_register(&nametoid_cache); | ||
476 | if (rv) | ||
477 | cache_unregister(&idtoname_cache); | ||
478 | return rv; | ||
475 | } | 479 | } |
476 | 480 | ||
477 | void | 481 | void |
478 | nfsd_idmap_shutdown(void) | 482 | nfsd_idmap_shutdown(void) |
479 | { | 483 | { |
480 | if (cache_unregister(&idtoname_cache)) | 484 | cache_unregister(&idtoname_cache); |
481 | printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n"); | 485 | cache_unregister(&nametoid_cache); |
482 | if (cache_unregister(&nametoid_cache)) | ||
483 | printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n"); | ||
484 | } | 486 | } |
485 | 487 | ||
486 | /* | 488 | /* |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 18ead1790bb3..c593db047d8b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -750,7 +750,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
750 | cstate->current_fh.fh_export, | 750 | cstate->current_fh.fh_export, |
751 | cstate->current_fh.fh_dentry, buf, | 751 | cstate->current_fh.fh_dentry, buf, |
752 | &count, verify->ve_bmval, | 752 | &count, verify->ve_bmval, |
753 | rqstp); | 753 | rqstp, 0); |
754 | 754 | ||
755 | /* this means that nfsd4_encode_fattr() ran out of space */ | 755 | /* this means that nfsd4_encode_fattr() ran out of space */ |
756 | if (status == nfserr_resource && count == 0) | 756 | if (status == nfserr_resource && count == 0) |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 31673cd251c3..f6744bc03dae 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */ | |||
61 | static time_t user_lease_time = 90; | 61 | static time_t user_lease_time = 90; |
62 | static time_t boot_time; | 62 | static time_t boot_time; |
63 | static int in_grace = 1; | 63 | static int in_grace = 1; |
64 | static u32 current_clientid = 1; | ||
65 | static u32 current_ownerid = 1; | 64 | static u32 current_ownerid = 1; |
66 | static u32 current_fileid = 1; | 65 | static u32 current_fileid = 1; |
67 | static u32 current_delegid = 1; | 66 | static u32 current_delegid = 1; |
@@ -340,21 +339,20 @@ STALE_CLIENTID(clientid_t *clid) | |||
340 | * This type of memory management is somewhat inefficient, but we use it | 339 | * This type of memory management is somewhat inefficient, but we use it |
341 | * anyway since SETCLIENTID is not a common operation. | 340 | * anyway since SETCLIENTID is not a common operation. |
342 | */ | 341 | */ |
343 | static inline struct nfs4_client * | 342 | static struct nfs4_client *alloc_client(struct xdr_netobj name) |
344 | alloc_client(struct xdr_netobj name) | ||
345 | { | 343 | { |
346 | struct nfs4_client *clp; | 344 | struct nfs4_client *clp; |
347 | 345 | ||
348 | if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) { | 346 | clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); |
349 | if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) { | 347 | if (clp == NULL) |
350 | memcpy(clp->cl_name.data, name.data, name.len); | 348 | return NULL; |
351 | clp->cl_name.len = name.len; | 349 | clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); |
352 | } | 350 | if (clp->cl_name.data == NULL) { |
353 | else { | 351 | kfree(clp); |
354 | kfree(clp); | 352 | return NULL; |
355 | clp = NULL; | ||
356 | } | ||
357 | } | 353 | } |
354 | memcpy(clp->cl_name.data, name.data, name.len); | ||
355 | clp->cl_name.len = name.len; | ||
358 | return clp; | 356 | return clp; |
359 | } | 357 | } |
360 | 358 | ||
@@ -363,8 +361,11 @@ shutdown_callback_client(struct nfs4_client *clp) | |||
363 | { | 361 | { |
364 | struct rpc_clnt *clnt = clp->cl_callback.cb_client; | 362 | struct rpc_clnt *clnt = clp->cl_callback.cb_client; |
365 | 363 | ||
366 | /* shutdown rpc client, ending any outstanding recall rpcs */ | ||
367 | if (clnt) { | 364 | if (clnt) { |
365 | /* | ||
366 | * Callback threads take a reference on the client, so there | ||
367 | * should be no outstanding callbacks at this point. | ||
368 | */ | ||
368 | clp->cl_callback.cb_client = NULL; | 369 | clp->cl_callback.cb_client = NULL; |
369 | rpc_shutdown_client(clnt); | 370 | rpc_shutdown_client(clnt); |
370 | } | 371 | } |
@@ -422,12 +423,13 @@ expire_client(struct nfs4_client *clp) | |||
422 | put_nfs4_client(clp); | 423 | put_nfs4_client(clp); |
423 | } | 424 | } |
424 | 425 | ||
425 | static struct nfs4_client * | 426 | static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) |
426 | create_client(struct xdr_netobj name, char *recdir) { | 427 | { |
427 | struct nfs4_client *clp; | 428 | struct nfs4_client *clp; |
428 | 429 | ||
429 | if (!(clp = alloc_client(name))) | 430 | clp = alloc_client(name); |
430 | goto out; | 431 | if (clp == NULL) |
432 | return NULL; | ||
431 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); | 433 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); |
432 | atomic_set(&clp->cl_count, 1); | 434 | atomic_set(&clp->cl_count, 1); |
433 | atomic_set(&clp->cl_callback.cb_set, 0); | 435 | atomic_set(&clp->cl_callback.cb_set, 0); |
@@ -436,32 +438,30 @@ create_client(struct xdr_netobj name, char *recdir) { | |||
436 | INIT_LIST_HEAD(&clp->cl_openowners); | 438 | INIT_LIST_HEAD(&clp->cl_openowners); |
437 | INIT_LIST_HEAD(&clp->cl_delegations); | 439 | INIT_LIST_HEAD(&clp->cl_delegations); |
438 | INIT_LIST_HEAD(&clp->cl_lru); | 440 | INIT_LIST_HEAD(&clp->cl_lru); |
439 | out: | ||
440 | return clp; | 441 | return clp; |
441 | } | 442 | } |
442 | 443 | ||
443 | static void | 444 | static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) |
444 | copy_verf(struct nfs4_client *target, nfs4_verifier *source) { | 445 | { |
445 | memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data)); | 446 | memcpy(target->cl_verifier.data, source->data, |
447 | sizeof(target->cl_verifier.data)); | ||
446 | } | 448 | } |
447 | 449 | ||
448 | static void | 450 | static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) |
449 | copy_clid(struct nfs4_client *target, struct nfs4_client *source) { | 451 | { |
450 | target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; | 452 | target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; |
451 | target->cl_clientid.cl_id = source->cl_clientid.cl_id; | 453 | target->cl_clientid.cl_id = source->cl_clientid.cl_id; |
452 | } | 454 | } |
453 | 455 | ||
454 | static void | 456 | static void copy_cred(struct svc_cred *target, struct svc_cred *source) |
455 | copy_cred(struct svc_cred *target, struct svc_cred *source) { | 457 | { |
456 | |||
457 | target->cr_uid = source->cr_uid; | 458 | target->cr_uid = source->cr_uid; |
458 | target->cr_gid = source->cr_gid; | 459 | target->cr_gid = source->cr_gid; |
459 | target->cr_group_info = source->cr_group_info; | 460 | target->cr_group_info = source->cr_group_info; |
460 | get_group_info(target->cr_group_info); | 461 | get_group_info(target->cr_group_info); |
461 | } | 462 | } |
462 | 463 | ||
463 | static inline int | 464 | static int same_name(const char *n1, const char *n2) |
464 | same_name(const char *n1, const char *n2) | ||
465 | { | 465 | { |
466 | return 0 == memcmp(n1, n2, HEXDIR_LEN); | 466 | return 0 == memcmp(n1, n2, HEXDIR_LEN); |
467 | } | 467 | } |
@@ -485,26 +485,26 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) | |||
485 | return cr1->cr_uid == cr2->cr_uid; | 485 | return cr1->cr_uid == cr2->cr_uid; |
486 | } | 486 | } |
487 | 487 | ||
488 | static void | 488 | static void gen_clid(struct nfs4_client *clp) |
489 | gen_clid(struct nfs4_client *clp) { | 489 | { |
490 | static u32 current_clientid = 1; | ||
491 | |||
490 | clp->cl_clientid.cl_boot = boot_time; | 492 | clp->cl_clientid.cl_boot = boot_time; |
491 | clp->cl_clientid.cl_id = current_clientid++; | 493 | clp->cl_clientid.cl_id = current_clientid++; |
492 | } | 494 | } |
493 | 495 | ||
494 | static void | 496 | static void gen_confirm(struct nfs4_client *clp) |
495 | gen_confirm(struct nfs4_client *clp) { | 497 | { |
496 | struct timespec tv; | 498 | static u32 i; |
497 | u32 * p; | 499 | u32 *p; |
498 | 500 | ||
499 | tv = CURRENT_TIME; | ||
500 | p = (u32 *)clp->cl_confirm.data; | 501 | p = (u32 *)clp->cl_confirm.data; |
501 | *p++ = tv.tv_sec; | 502 | *p++ = get_seconds(); |
502 | *p++ = tv.tv_nsec; | 503 | *p++ = i++; |
503 | } | 504 | } |
504 | 505 | ||
505 | static int | 506 | static int check_name(struct xdr_netobj name) |
506 | check_name(struct xdr_netobj name) { | 507 | { |
507 | |||
508 | if (name.len == 0) | 508 | if (name.len == 0) |
509 | return 0; | 509 | return 0; |
510 | if (name.len > NFS4_OPAQUE_LIMIT) { | 510 | if (name.len > NFS4_OPAQUE_LIMIT) { |
@@ -683,39 +683,6 @@ out_err: | |||
683 | return; | 683 | return; |
684 | } | 684 | } |
685 | 685 | ||
686 | /* | ||
687 | * RFC 3010 has a complex implmentation description of processing a | ||
688 | * SETCLIENTID request consisting of 5 bullets, labeled as | ||
689 | * CASE0 - CASE4 below. | ||
690 | * | ||
691 | * NOTES: | ||
692 | * callback information will be processed in a future patch | ||
693 | * | ||
694 | * an unconfirmed record is added when: | ||
695 | * NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record. | ||
696 | * CASE 1: confirmed record found with matching name, principal, | ||
697 | * verifier, and clientid. | ||
698 | * CASE 2: confirmed record found with matching name, principal, | ||
699 | * and there is no unconfirmed record with matching | ||
700 | * name and principal | ||
701 | * | ||
702 | * an unconfirmed record is replaced when: | ||
703 | * CASE 3: confirmed record found with matching name, principal, | ||
704 | * and an unconfirmed record is found with matching | ||
705 | * name, principal, and with clientid and | ||
706 | * confirm that does not match the confirmed record. | ||
707 | * CASE 4: there is no confirmed record with matching name and | ||
708 | * principal. there is an unconfirmed record with | ||
709 | * matching name, principal. | ||
710 | * | ||
711 | * an unconfirmed record is deleted when: | ||
712 | * CASE 1: an unconfirmed record that matches input name, verifier, | ||
713 | * and confirmed clientid. | ||
714 | * CASE 4: any unconfirmed records with matching name and principal | ||
715 | * that exist after an unconfirmed record has been replaced | ||
716 | * as described above. | ||
717 | * | ||
718 | */ | ||
719 | __be32 | 686 | __be32 |
720 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 687 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
721 | struct nfsd4_setclientid *setclid) | 688 | struct nfsd4_setclientid *setclid) |
@@ -748,11 +715,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
748 | nfs4_lock_state(); | 715 | nfs4_lock_state(); |
749 | conf = find_confirmed_client_by_str(dname, strhashval); | 716 | conf = find_confirmed_client_by_str(dname, strhashval); |
750 | if (conf) { | 717 | if (conf) { |
751 | /* | 718 | /* RFC 3530 14.2.33 CASE 0: */ |
752 | * CASE 0: | ||
753 | * clname match, confirmed, different principal | ||
754 | * or different ip_address | ||
755 | */ | ||
756 | status = nfserr_clid_inuse; | 719 | status = nfserr_clid_inuse; |
757 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) | 720 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) |
758 | || conf->cl_addr != sin->sin_addr.s_addr) { | 721 | || conf->cl_addr != sin->sin_addr.s_addr) { |
@@ -761,12 +724,17 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
761 | goto out; | 724 | goto out; |
762 | } | 725 | } |
763 | } | 726 | } |
727 | /* | ||
728 | * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION") | ||
729 | * has a description of SETCLIENTID request processing consisting | ||
730 | * of 5 bullet points, labeled as CASE0 - CASE4 below. | ||
731 | */ | ||
764 | unconf = find_unconfirmed_client_by_str(dname, strhashval); | 732 | unconf = find_unconfirmed_client_by_str(dname, strhashval); |
765 | status = nfserr_resource; | 733 | status = nfserr_resource; |
766 | if (!conf) { | 734 | if (!conf) { |
767 | /* | 735 | /* |
768 | * CASE 4: | 736 | * RFC 3530 14.2.33 CASE 4: |
769 | * placed first, because it is the normal case. | 737 | * placed first, because it is the normal case |
770 | */ | 738 | */ |
771 | if (unconf) | 739 | if (unconf) |
772 | expire_client(unconf); | 740 | expire_client(unconf); |
@@ -776,17 +744,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
776 | gen_clid(new); | 744 | gen_clid(new); |
777 | } else if (same_verf(&conf->cl_verifier, &clverifier)) { | 745 | } else if (same_verf(&conf->cl_verifier, &clverifier)) { |
778 | /* | 746 | /* |
779 | * CASE 1: | 747 | * RFC 3530 14.2.33 CASE 1: |
780 | * cl_name match, confirmed, principal match | 748 | * probable callback update |
781 | * verifier match: probable callback update | ||
782 | * | ||
783 | * remove any unconfirmed nfs4_client with | ||
784 | * matching cl_name, cl_verifier, and cl_clientid | ||
785 | * | ||
786 | * create and insert an unconfirmed nfs4_client with same | ||
787 | * cl_name, cl_verifier, and cl_clientid as existing | ||
788 | * nfs4_client, but with the new callback info and a | ||
789 | * new cl_confirm | ||
790 | */ | 749 | */ |
791 | if (unconf) { | 750 | if (unconf) { |
792 | /* Note this is removing unconfirmed {*x***}, | 751 | /* Note this is removing unconfirmed {*x***}, |
@@ -802,43 +761,25 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
802 | copy_clid(new, conf); | 761 | copy_clid(new, conf); |
803 | } else if (!unconf) { | 762 | } else if (!unconf) { |
804 | /* | 763 | /* |
805 | * CASE 2: | 764 | * RFC 3530 14.2.33 CASE 2: |
806 | * clname match, confirmed, principal match | 765 | * probable client reboot; state will be removed if |
807 | * verfier does not match | 766 | * confirmed. |
808 | * no unconfirmed. create a new unconfirmed nfs4_client | ||
809 | * using input clverifier, clname, and callback info | ||
810 | * and generate a new cl_clientid and cl_confirm. | ||
811 | */ | 767 | */ |
812 | new = create_client(clname, dname); | 768 | new = create_client(clname, dname); |
813 | if (new == NULL) | 769 | if (new == NULL) |
814 | goto out; | 770 | goto out; |
815 | gen_clid(new); | 771 | gen_clid(new); |
816 | } else if (!same_verf(&conf->cl_confirm, &unconf->cl_confirm)) { | 772 | } else { |
817 | /* | 773 | /* |
818 | * CASE3: | 774 | * RFC 3530 14.2.33 CASE 3: |
819 | * confirmed found (name, principal match) | 775 | * probable client reboot; state will be removed if |
820 | * confirmed verifier does not match input clverifier | 776 | * confirmed. |
821 | * | ||
822 | * unconfirmed found (name match) | ||
823 | * confirmed->cl_confirm != unconfirmed->cl_confirm | ||
824 | * | ||
825 | * remove unconfirmed. | ||
826 | * | ||
827 | * create an unconfirmed nfs4_client | ||
828 | * with same cl_name as existing confirmed nfs4_client, | ||
829 | * but with new callback info, new cl_clientid, | ||
830 | * new cl_verifier and a new cl_confirm | ||
831 | */ | 777 | */ |
832 | expire_client(unconf); | 778 | expire_client(unconf); |
833 | new = create_client(clname, dname); | 779 | new = create_client(clname, dname); |
834 | if (new == NULL) | 780 | if (new == NULL) |
835 | goto out; | 781 | goto out; |
836 | gen_clid(new); | 782 | gen_clid(new); |
837 | } else { | ||
838 | /* No cases hit !!! */ | ||
839 | status = nfserr_inval; | ||
840 | goto out; | ||
841 | |||
842 | } | 783 | } |
843 | copy_verf(new, &clverifier); | 784 | copy_verf(new, &clverifier); |
844 | new->cl_addr = sin->sin_addr.s_addr; | 785 | new->cl_addr = sin->sin_addr.s_addr; |
@@ -857,11 +798,9 @@ out: | |||
857 | 798 | ||
858 | 799 | ||
859 | /* | 800 | /* |
860 | * RFC 3010 has a complex implmentation description of processing a | 801 | * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has |
861 | * SETCLIENTID_CONFIRM request consisting of 4 bullets describing | 802 | * a description of SETCLIENTID_CONFIRM request processing consisting of 4 |
862 | * processing on a DRC miss, labeled as CASE1 - CASE4 below. | 803 | * bullets, labeled as CASE1 - CASE4 below. |
863 | * | ||
864 | * NOTE: callback information will be processed here in a future patch | ||
865 | */ | 804 | */ |
866 | __be32 | 805 | __be32 |
867 | nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | 806 | nfsd4_setclientid_confirm(struct svc_rqst *rqstp, |
@@ -892,16 +831,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
892 | if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) | 831 | if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) |
893 | goto out; | 832 | goto out; |
894 | 833 | ||
895 | if ((conf && unconf) && | 834 | /* |
896 | (same_verf(&unconf->cl_confirm, &confirm)) && | 835 | * section 14.2.34 of RFC 3530 has a description of |
897 | (same_verf(&conf->cl_verifier, &unconf->cl_verifier)) && | 836 | * SETCLIENTID_CONFIRM request processing consisting |
898 | (same_name(conf->cl_recdir,unconf->cl_recdir)) && | 837 | * of 4 bullet points, labeled as CASE1 - CASE4 below. |
899 | (!same_verf(&conf->cl_confirm, &unconf->cl_confirm))) { | 838 | */ |
900 | /* CASE 1: | 839 | if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) { |
901 | * unconf record that matches input clientid and input confirm. | 840 | /* |
902 | * conf record that matches input clientid. | 841 | * RFC 3530 14.2.34 CASE 1: |
903 | * conf and unconf records match names, verifiers | 842 | * callback update |
904 | */ | 843 | */ |
905 | if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) | 844 | if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) |
906 | status = nfserr_clid_inuse; | 845 | status = nfserr_clid_inuse; |
907 | else { | 846 | else { |
@@ -914,15 +853,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
914 | status = nfs_ok; | 853 | status = nfs_ok; |
915 | 854 | ||
916 | } | 855 | } |
917 | } else if ((conf && !unconf) || | 856 | } else if (conf && !unconf) { |
918 | ((conf && unconf) && | 857 | /* |
919 | (!same_verf(&conf->cl_verifier, &unconf->cl_verifier) || | 858 | * RFC 3530 14.2.34 CASE 2: |
920 | !same_name(conf->cl_recdir, unconf->cl_recdir)))) { | 859 | * probable retransmitted request; play it safe and |
921 | /* CASE 2: | 860 | * do nothing. |
922 | * conf record that matches input clientid. | ||
923 | * if unconf record matches input clientid, then | ||
924 | * unconf->cl_name or unconf->cl_verifier don't match the | ||
925 | * conf record. | ||
926 | */ | 861 | */ |
927 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) | 862 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) |
928 | status = nfserr_clid_inuse; | 863 | status = nfserr_clid_inuse; |
@@ -930,10 +865,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
930 | status = nfs_ok; | 865 | status = nfs_ok; |
931 | } else if (!conf && unconf | 866 | } else if (!conf && unconf |
932 | && same_verf(&unconf->cl_confirm, &confirm)) { | 867 | && same_verf(&unconf->cl_confirm, &confirm)) { |
933 | /* CASE 3: | 868 | /* |
934 | * conf record not found. | 869 | * RFC 3530 14.2.34 CASE 3: |
935 | * unconf record found. | 870 | * Normal case; new or rebooted client: |
936 | * unconf->cl_confirm matches input confirm | ||
937 | */ | 871 | */ |
938 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { | 872 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { |
939 | status = nfserr_clid_inuse; | 873 | status = nfserr_clid_inuse; |
@@ -948,16 +882,15 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
948 | } | 882 | } |
949 | move_to_confirmed(unconf); | 883 | move_to_confirmed(unconf); |
950 | conf = unconf; | 884 | conf = unconf; |
885 | nfsd4_probe_callback(conf); | ||
951 | status = nfs_ok; | 886 | status = nfs_ok; |
952 | } | 887 | } |
953 | } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) | 888 | } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) |
954 | && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, | 889 | && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, |
955 | &confirm)))) { | 890 | &confirm)))) { |
956 | /* CASE 4: | 891 | /* |
957 | * conf record not found, or if conf, conf->cl_confirm does not | 892 | * RFC 3530 14.2.34 CASE 4: |
958 | * match input confirm. | 893 | * Client probably hasn't noticed that we rebooted yet. |
959 | * unconf record not found, or if unconf, unconf->cl_confirm | ||
960 | * does not match input confirm. | ||
961 | */ | 894 | */ |
962 | status = nfserr_stale_clientid; | 895 | status = nfserr_stale_clientid; |
963 | } else { | 896 | } else { |
@@ -965,8 +898,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
965 | status = nfserr_clid_inuse; | 898 | status = nfserr_clid_inuse; |
966 | } | 899 | } |
967 | out: | 900 | out: |
968 | if (!status) | ||
969 | nfsd4_probe_callback(conf); | ||
970 | nfs4_unlock_state(); | 901 | nfs4_unlock_state(); |
971 | return status; | 902 | return status; |
972 | } | 903 | } |
@@ -1226,14 +1157,19 @@ find_file(struct inode *ino) | |||
1226 | return NULL; | 1157 | return NULL; |
1227 | } | 1158 | } |
1228 | 1159 | ||
1229 | static int access_valid(u32 x) | 1160 | static inline int access_valid(u32 x) |
1230 | { | 1161 | { |
1231 | return (x > 0 && x < 4); | 1162 | if (x < NFS4_SHARE_ACCESS_READ) |
1163 | return 0; | ||
1164 | if (x > NFS4_SHARE_ACCESS_BOTH) | ||
1165 | return 0; | ||
1166 | return 1; | ||
1232 | } | 1167 | } |
1233 | 1168 | ||
1234 | static int deny_valid(u32 x) | 1169 | static inline int deny_valid(u32 x) |
1235 | { | 1170 | { |
1236 | return (x >= 0 && x < 5); | 1171 | /* Note: unlike access bits, deny bits may be zero. */ |
1172 | return x <= NFS4_SHARE_DENY_BOTH; | ||
1237 | } | 1173 | } |
1238 | 1174 | ||
1239 | static void | 1175 | static void |
@@ -2162,8 +2098,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2162 | goto check_replay; | 2098 | goto check_replay; |
2163 | } | 2099 | } |
2164 | 2100 | ||
2101 | *stpp = stp; | ||
2102 | *sopp = sop = stp->st_stateowner; | ||
2103 | |||
2165 | if (lock) { | 2104 | if (lock) { |
2166 | struct nfs4_stateowner *sop = stp->st_stateowner; | ||
2167 | clientid_t *lockclid = &lock->v.new.clientid; | 2105 | clientid_t *lockclid = &lock->v.new.clientid; |
2168 | struct nfs4_client *clp = sop->so_client; | 2106 | struct nfs4_client *clp = sop->so_client; |
2169 | int lkflg = 0; | 2107 | int lkflg = 0; |
@@ -2193,9 +2131,6 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
2193 | return nfserr_bad_stateid; | 2131 | return nfserr_bad_stateid; |
2194 | } | 2132 | } |
2195 | 2133 | ||
2196 | *stpp = stp; | ||
2197 | *sopp = sop = stp->st_stateowner; | ||
2198 | |||
2199 | /* | 2134 | /* |
2200 | * We now validate the seqid and stateid generation numbers. | 2135 | * We now validate the seqid and stateid generation numbers. |
2201 | * For the moment, we ignore the possibility of | 2136 | * For the moment, we ignore the possibility of |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 57333944af7f..b0592e7c378d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -148,12 +148,12 @@ xdr_error: \ | |||
148 | } \ | 148 | } \ |
149 | } while (0) | 149 | } while (0) |
150 | 150 | ||
151 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) | 151 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) |
152 | { | 152 | { |
153 | /* We want more bytes than seem to be available. | 153 | /* We want more bytes than seem to be available. |
154 | * Maybe we need a new page, maybe we have just run out | 154 | * Maybe we need a new page, maybe we have just run out |
155 | */ | 155 | */ |
156 | int avail = (char*)argp->end - (char*)argp->p; | 156 | unsigned int avail = (char *)argp->end - (char *)argp->p; |
157 | __be32 *p; | 157 | __be32 *p; |
158 | if (avail + argp->pagelen < nbytes) | 158 | if (avail + argp->pagelen < nbytes) |
159 | return NULL; | 159 | return NULL; |
@@ -169,6 +169,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) | |||
169 | return NULL; | 169 | return NULL; |
170 | 170 | ||
171 | } | 171 | } |
172 | /* | ||
173 | * The following memcpy is safe because read_buf is always | ||
174 | * called with nbytes > avail, and the two cases above both | ||
175 | * guarantee p points to at least nbytes bytes. | ||
176 | */ | ||
172 | memcpy(p, argp->p, avail); | 177 | memcpy(p, argp->p, avail); |
173 | /* step to next page */ | 178 | /* step to next page */ |
174 | argp->p = page_address(argp->pagelist[0]); | 179 | argp->p = page_address(argp->pagelist[0]); |
@@ -1448,7 +1453,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) | |||
1448 | __be32 | 1453 | __be32 |
1449 | nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | 1454 | nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, |
1450 | struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, | 1455 | struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, |
1451 | struct svc_rqst *rqstp) | 1456 | struct svc_rqst *rqstp, int ignore_crossmnt) |
1452 | { | 1457 | { |
1453 | u32 bmval0 = bmval[0]; | 1458 | u32 bmval0 = bmval[0]; |
1454 | u32 bmval1 = bmval[1]; | 1459 | u32 bmval1 = bmval[1]; |
@@ -1828,7 +1833,12 @@ out_acl: | |||
1828 | if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { | 1833 | if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { |
1829 | if ((buflen -= 8) < 0) | 1834 | if ((buflen -= 8) < 0) |
1830 | goto out_resource; | 1835 | goto out_resource; |
1831 | if (exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) { | 1836 | /* |
1837 | * Get parent's attributes if not ignoring crossmount | ||
1838 | * and this is the root of a cross-mounted filesystem. | ||
1839 | */ | ||
1840 | if (ignore_crossmnt == 0 && | ||
1841 | exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) { | ||
1832 | err = vfs_getattr(exp->ex_mnt->mnt_parent, | 1842 | err = vfs_getattr(exp->ex_mnt->mnt_parent, |
1833 | exp->ex_mnt->mnt_mountpoint, &stat); | 1843 | exp->ex_mnt->mnt_mountpoint, &stat); |
1834 | if (err) | 1844 | if (err) |
@@ -1864,13 +1874,25 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | |||
1864 | struct svc_export *exp = cd->rd_fhp->fh_export; | 1874 | struct svc_export *exp = cd->rd_fhp->fh_export; |
1865 | struct dentry *dentry; | 1875 | struct dentry *dentry; |
1866 | __be32 nfserr; | 1876 | __be32 nfserr; |
1877 | int ignore_crossmnt = 0; | ||
1867 | 1878 | ||
1868 | dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); | 1879 | dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); |
1869 | if (IS_ERR(dentry)) | 1880 | if (IS_ERR(dentry)) |
1870 | return nfserrno(PTR_ERR(dentry)); | 1881 | return nfserrno(PTR_ERR(dentry)); |
1871 | 1882 | ||
1872 | exp_get(exp); | 1883 | exp_get(exp); |
1873 | if (d_mountpoint(dentry)) { | 1884 | /* |
1885 | * In the case of a mountpoint, the client may be asking for | ||
1886 | * attributes that are only properties of the underlying filesystem | ||
1887 | * as opposed to the cross-mounted file system. In such a case, | ||
1888 | * we will not follow the cross mount and will fill the attribtutes | ||
1889 | * directly from the mountpoint dentry. | ||
1890 | */ | ||
1891 | if (d_mountpoint(dentry) && | ||
1892 | (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 && | ||
1893 | (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0) | ||
1894 | ignore_crossmnt = 1; | ||
1895 | else if (d_mountpoint(dentry)) { | ||
1874 | int err; | 1896 | int err; |
1875 | 1897 | ||
1876 | /* | 1898 | /* |
@@ -1889,7 +1911,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | |||
1889 | 1911 | ||
1890 | } | 1912 | } |
1891 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, | 1913 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, |
1892 | cd->rd_rqstp); | 1914 | cd->rd_rqstp, ignore_crossmnt); |
1893 | out_put: | 1915 | out_put: |
1894 | dput(dentry); | 1916 | dput(dentry); |
1895 | exp_put(exp); | 1917 | exp_put(exp); |
@@ -2043,7 +2065,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | |||
2043 | buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); | 2065 | buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); |
2044 | nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, | 2066 | nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, |
2045 | resp->p, &buflen, getattr->ga_bmval, | 2067 | resp->p, &buflen, getattr->ga_bmval, |
2046 | resp->rqstp); | 2068 | resp->rqstp, 0); |
2047 | if (!nfserr) | 2069 | if (!nfserr) |
2048 | resp->p += buflen; | 2070 | resp->p += buflen; |
2049 | return nfserr; | 2071 | return nfserr; |
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 578f2c9d56be..5bfc2ac60d54 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
@@ -44,17 +44,17 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); | |||
44 | */ | 44 | */ |
45 | static DEFINE_SPINLOCK(cache_lock); | 45 | static DEFINE_SPINLOCK(cache_lock); |
46 | 46 | ||
47 | void | 47 | int nfsd_reply_cache_init(void) |
48 | nfsd_cache_init(void) | ||
49 | { | 48 | { |
50 | struct svc_cacherep *rp; | 49 | struct svc_cacherep *rp; |
51 | int i; | 50 | int i; |
52 | 51 | ||
53 | INIT_LIST_HEAD(&lru_head); | 52 | INIT_LIST_HEAD(&lru_head); |
54 | i = CACHESIZE; | 53 | i = CACHESIZE; |
55 | while(i) { | 54 | while (i) { |
56 | rp = kmalloc(sizeof(*rp), GFP_KERNEL); | 55 | rp = kmalloc(sizeof(*rp), GFP_KERNEL); |
57 | if (!rp) break; | 56 | if (!rp) |
57 | goto out_nomem; | ||
58 | list_add(&rp->c_lru, &lru_head); | 58 | list_add(&rp->c_lru, &lru_head); |
59 | rp->c_state = RC_UNUSED; | 59 | rp->c_state = RC_UNUSED; |
60 | rp->c_type = RC_NOCACHE; | 60 | rp->c_type = RC_NOCACHE; |
@@ -62,23 +62,19 @@ nfsd_cache_init(void) | |||
62 | i--; | 62 | i--; |
63 | } | 63 | } |
64 | 64 | ||
65 | if (i) | ||
66 | printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n", | ||
67 | CACHESIZE, CACHESIZE-i); | ||
68 | |||
69 | hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); | 65 | hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); |
70 | if (!hash_list) { | 66 | if (!hash_list) |
71 | nfsd_cache_shutdown(); | 67 | goto out_nomem; |
72 | printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n", | ||
73 | HASHSIZE * sizeof(struct hlist_head)); | ||
74 | return; | ||
75 | } | ||
76 | 68 | ||
77 | cache_disabled = 0; | 69 | cache_disabled = 0; |
70 | return 0; | ||
71 | out_nomem: | ||
72 | printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); | ||
73 | nfsd_reply_cache_shutdown(); | ||
74 | return -ENOMEM; | ||
78 | } | 75 | } |
79 | 76 | ||
80 | void | 77 | void nfsd_reply_cache_shutdown(void) |
81 | nfsd_cache_shutdown(void) | ||
82 | { | 78 | { |
83 | struct svc_cacherep *rp; | 79 | struct svc_cacherep *rp; |
84 | 80 | ||
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 77dc9893b7ba..8516137cdbb0 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -304,6 +304,9 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) | |||
304 | struct auth_domain *dom; | 304 | struct auth_domain *dom; |
305 | struct knfsd_fh fh; | 305 | struct knfsd_fh fh; |
306 | 306 | ||
307 | if (size == 0) | ||
308 | return -EINVAL; | ||
309 | |||
307 | if (buf[size-1] != '\n') | 310 | if (buf[size-1] != '\n') |
308 | return -EINVAL; | 311 | return -EINVAL; |
309 | buf[size-1] = 0; | 312 | buf[size-1] = 0; |
@@ -503,7 +506,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) | |||
503 | int len = 0; | 506 | int len = 0; |
504 | lock_kernel(); | 507 | lock_kernel(); |
505 | if (nfsd_serv) | 508 | if (nfsd_serv) |
506 | len = svc_sock_names(buf, nfsd_serv, NULL); | 509 | len = svc_xprt_names(nfsd_serv, buf, 0); |
507 | unlock_kernel(); | 510 | unlock_kernel(); |
508 | return len; | 511 | return len; |
509 | } | 512 | } |
@@ -540,7 +543,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) | |||
540 | } | 543 | } |
541 | return err < 0 ? err : 0; | 544 | return err < 0 ? err : 0; |
542 | } | 545 | } |
543 | if (buf[0] == '-') { | 546 | if (buf[0] == '-' && isdigit(buf[1])) { |
544 | char *toclose = kstrdup(buf+1, GFP_KERNEL); | 547 | char *toclose = kstrdup(buf+1, GFP_KERNEL); |
545 | int len = 0; | 548 | int len = 0; |
546 | if (!toclose) | 549 | if (!toclose) |
@@ -554,6 +557,53 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) | |||
554 | kfree(toclose); | 557 | kfree(toclose); |
555 | return len; | 558 | return len; |
556 | } | 559 | } |
560 | /* | ||
561 | * Add a transport listener by writing it's transport name | ||
562 | */ | ||
563 | if (isalpha(buf[0])) { | ||
564 | int err; | ||
565 | char transport[16]; | ||
566 | int port; | ||
567 | if (sscanf(buf, "%15s %4d", transport, &port) == 2) { | ||
568 | err = nfsd_create_serv(); | ||
569 | if (!err) { | ||
570 | err = svc_create_xprt(nfsd_serv, | ||
571 | transport, port, | ||
572 | SVC_SOCK_ANONYMOUS); | ||
573 | if (err == -ENOENT) | ||
574 | /* Give a reasonable perror msg for | ||
575 | * bad transport string */ | ||
576 | err = -EPROTONOSUPPORT; | ||
577 | } | ||
578 | return err < 0 ? err : 0; | ||
579 | } | ||
580 | } | ||
581 | /* | ||
582 | * Remove a transport by writing it's transport name and port number | ||
583 | */ | ||
584 | if (buf[0] == '-' && isalpha(buf[1])) { | ||
585 | struct svc_xprt *xprt; | ||
586 | int err = -EINVAL; | ||
587 | char transport[16]; | ||
588 | int port; | ||
589 | if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { | ||
590 | if (port == 0) | ||
591 | return -EINVAL; | ||
592 | lock_kernel(); | ||
593 | if (nfsd_serv) { | ||
594 | xprt = svc_find_xprt(nfsd_serv, transport, | ||
595 | AF_UNSPEC, port); | ||
596 | if (xprt) { | ||
597 | svc_close_xprt(xprt); | ||
598 | svc_xprt_put(xprt); | ||
599 | err = 0; | ||
600 | } else | ||
601 | err = -ENOTCONN; | ||
602 | } | ||
603 | unlock_kernel(); | ||
604 | return err < 0 ? err : 0; | ||
605 | } | ||
606 | } | ||
557 | return -EINVAL; | 607 | return -EINVAL; |
558 | } | 608 | } |
559 | 609 | ||
@@ -616,7 +666,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) | |||
616 | char *recdir; | 666 | char *recdir; |
617 | int len, status; | 667 | int len, status; |
618 | 668 | ||
619 | if (size > PATH_MAX || buf[size-1] != '\n') | 669 | if (size == 0 || size > PATH_MAX || buf[size-1] != '\n') |
620 | return -EINVAL; | 670 | return -EINVAL; |
621 | buf[size-1] = 0; | 671 | buf[size-1] = 0; |
622 | 672 | ||
@@ -674,6 +724,27 @@ static struct file_system_type nfsd_fs_type = { | |||
674 | .kill_sb = kill_litter_super, | 724 | .kill_sb = kill_litter_super, |
675 | }; | 725 | }; |
676 | 726 | ||
727 | #ifdef CONFIG_PROC_FS | ||
728 | static int create_proc_exports_entry(void) | ||
729 | { | ||
730 | struct proc_dir_entry *entry; | ||
731 | |||
732 | entry = proc_mkdir("fs/nfs", NULL); | ||
733 | if (!entry) | ||
734 | return -ENOMEM; | ||
735 | entry = create_proc_entry("fs/nfs/exports", 0, NULL); | ||
736 | if (!entry) | ||
737 | return -ENOMEM; | ||
738 | entry->proc_fops = &exports_operations; | ||
739 | return 0; | ||
740 | } | ||
741 | #else /* CONFIG_PROC_FS */ | ||
742 | static int create_proc_exports_entry(void) | ||
743 | { | ||
744 | return 0; | ||
745 | } | ||
746 | #endif | ||
747 | |||
677 | static int __init init_nfsd(void) | 748 | static int __init init_nfsd(void) |
678 | { | 749 | { |
679 | int retval; | 750 | int retval; |
@@ -683,32 +754,43 @@ static int __init init_nfsd(void) | |||
683 | if (retval) | 754 | if (retval) |
684 | return retval; | 755 | return retval; |
685 | nfsd_stat_init(); /* Statistics */ | 756 | nfsd_stat_init(); /* Statistics */ |
686 | nfsd_cache_init(); /* RPC reply cache */ | 757 | retval = nfsd_reply_cache_init(); |
687 | nfsd_export_init(); /* Exports table */ | 758 | if (retval) |
759 | goto out_free_stat; | ||
760 | retval = nfsd_export_init(); | ||
761 | if (retval) | ||
762 | goto out_free_cache; | ||
688 | nfsd_lockd_init(); /* lockd->nfsd callbacks */ | 763 | nfsd_lockd_init(); /* lockd->nfsd callbacks */ |
689 | nfsd_idmap_init(); /* Name to ID mapping */ | 764 | retval = nfsd_idmap_init(); |
690 | if (proc_mkdir("fs/nfs", NULL)) { | 765 | if (retval) |
691 | struct proc_dir_entry *entry; | 766 | goto out_free_lockd; |
692 | entry = create_proc_entry("fs/nfs/exports", 0, NULL); | 767 | retval = create_proc_exports_entry(); |
693 | if (entry) | 768 | if (retval) |
694 | entry->proc_fops = &exports_operations; | 769 | goto out_free_idmap; |
695 | } | ||
696 | retval = register_filesystem(&nfsd_fs_type); | 770 | retval = register_filesystem(&nfsd_fs_type); |
697 | if (retval) { | 771 | if (retval) |
698 | nfsd_export_shutdown(); | 772 | goto out_free_all; |
699 | nfsd_cache_shutdown(); | 773 | return 0; |
700 | remove_proc_entry("fs/nfs/exports", NULL); | 774 | out_free_all: |
701 | remove_proc_entry("fs/nfs", NULL); | 775 | remove_proc_entry("fs/nfs/exports", NULL); |
702 | nfsd_stat_shutdown(); | 776 | remove_proc_entry("fs/nfs", NULL); |
703 | nfsd_lockd_shutdown(); | 777 | out_free_idmap: |
704 | } | 778 | nfsd_idmap_shutdown(); |
779 | out_free_lockd: | ||
780 | nfsd_lockd_shutdown(); | ||
781 | nfsd_export_shutdown(); | ||
782 | out_free_cache: | ||
783 | nfsd_reply_cache_shutdown(); | ||
784 | out_free_stat: | ||
785 | nfsd_stat_shutdown(); | ||
786 | nfsd4_free_slabs(); | ||
705 | return retval; | 787 | return retval; |
706 | } | 788 | } |
707 | 789 | ||
708 | static void __exit exit_nfsd(void) | 790 | static void __exit exit_nfsd(void) |
709 | { | 791 | { |
710 | nfsd_export_shutdown(); | 792 | nfsd_export_shutdown(); |
711 | nfsd_cache_shutdown(); | 793 | nfsd_reply_cache_shutdown(); |
712 | remove_proc_entry("fs/nfs/exports", NULL); | 794 | remove_proc_entry("fs/nfs/exports", NULL); |
713 | remove_proc_entry("fs/nfs", NULL); | 795 | remove_proc_entry("fs/nfs", NULL); |
714 | nfsd_stat_shutdown(); | 796 | nfsd_stat_shutdown(); |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 468f17a78441..8fbd2dc08a92 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/sunrpc/svc.h> | 22 | #include <linux/sunrpc/svc.h> |
23 | #include <linux/sunrpc/svcauth_gss.h> | 23 | #include <linux/sunrpc/svcauth_gss.h> |
24 | #include <linux/nfsd/nfsd.h> | 24 | #include <linux/nfsd/nfsd.h> |
25 | #include "auth.h" | ||
25 | 26 | ||
26 | #define NFSDDBG_FACILITY NFSDDBG_FH | 27 | #define NFSDDBG_FACILITY NFSDDBG_FH |
27 | 28 | ||
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1190aeaa92be..9647b0f7bc0c 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -155,8 +155,8 @@ static int killsig; /* signal that was used to kill last nfsd */ | |||
155 | static void nfsd_last_thread(struct svc_serv *serv) | 155 | static void nfsd_last_thread(struct svc_serv *serv) |
156 | { | 156 | { |
157 | /* When last nfsd thread exits we need to do some clean-up */ | 157 | /* When last nfsd thread exits we need to do some clean-up */ |
158 | struct svc_sock *svsk; | 158 | struct svc_xprt *xprt; |
159 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) | 159 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) |
160 | lockd_down(); | 160 | lockd_down(); |
161 | nfsd_serv = NULL; | 161 | nfsd_serv = NULL; |
162 | nfsd_racache_shutdown(); | 162 | nfsd_racache_shutdown(); |
@@ -236,7 +236,7 @@ static int nfsd_init_socks(int port) | |||
236 | 236 | ||
237 | error = lockd_up(IPPROTO_UDP); | 237 | error = lockd_up(IPPROTO_UDP); |
238 | if (error >= 0) { | 238 | if (error >= 0) { |
239 | error = svc_makesock(nfsd_serv, IPPROTO_UDP, port, | 239 | error = svc_create_xprt(nfsd_serv, "udp", port, |
240 | SVC_SOCK_DEFAULTS); | 240 | SVC_SOCK_DEFAULTS); |
241 | if (error < 0) | 241 | if (error < 0) |
242 | lockd_down(); | 242 | lockd_down(); |
@@ -247,7 +247,7 @@ static int nfsd_init_socks(int port) | |||
247 | #ifdef CONFIG_NFSD_TCP | 247 | #ifdef CONFIG_NFSD_TCP |
248 | error = lockd_up(IPPROTO_TCP); | 248 | error = lockd_up(IPPROTO_TCP); |
249 | if (error >= 0) { | 249 | if (error >= 0) { |
250 | error = svc_makesock(nfsd_serv, IPPROTO_TCP, port, | 250 | error = svc_create_xprt(nfsd_serv, "tcp", port, |
251 | SVC_SOCK_DEFAULTS); | 251 | SVC_SOCK_DEFAULTS); |
252 | if (error < 0) | 252 | if (error < 0) |
253 | lockd_down(); | 253 | lockd_down(); |
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b86e3658a0af..61ad61743d94 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/nfsd/nfsd.h> | 15 | #include <linux/nfsd/nfsd.h> |
16 | #include <linux/nfsd/xdr.h> | 16 | #include <linux/nfsd/xdr.h> |
17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
18 | #include "auth.h" | ||
18 | 19 | ||
19 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 20 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
20 | 21 | ||
@@ -62,10 +63,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp) | |||
62 | * no slashes or null bytes. | 63 | * no slashes or null bytes. |
63 | */ | 64 | */ |
64 | static __be32 * | 65 | static __be32 * |
65 | decode_filename(__be32 *p, char **namp, int *lenp) | 66 | decode_filename(__be32 *p, char **namp, unsigned int *lenp) |
66 | { | 67 | { |
67 | char *name; | 68 | char *name; |
68 | int i; | 69 | unsigned int i; |
69 | 70 | ||
70 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { | 71 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { |
71 | for (i = 0, name = *namp; i < *lenp; i++, name++) { | 72 | for (i = 0, name = *namp; i < *lenp; i++, name++) { |
@@ -78,10 +79,10 @@ decode_filename(__be32 *p, char **namp, int *lenp) | |||
78 | } | 79 | } |
79 | 80 | ||
80 | static __be32 * | 81 | static __be32 * |
81 | decode_pathname(__be32 *p, char **namp, int *lenp) | 82 | decode_pathname(__be32 *p, char **namp, unsigned int *lenp) |
82 | { | 83 | { |
83 | char *name; | 84 | char *name; |
84 | int i; | 85 | unsigned int i; |
85 | 86 | ||
86 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { | 87 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { |
87 | for (i = 0, name = *namp; i < *lenp; i++, name++) { | 88 | for (i = 0, name = *namp; i < *lenp; i++, name++) { |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d0199189924c..cc75e4fcd02b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -132,7 +132,7 @@ out: | |||
132 | 132 | ||
133 | __be32 | 133 | __be32 |
134 | nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, | 134 | nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, |
135 | const char *name, int len, | 135 | const char *name, unsigned int len, |
136 | struct svc_export **exp_ret, struct dentry **dentry_ret) | 136 | struct svc_export **exp_ret, struct dentry **dentry_ret) |
137 | { | 137 | { |
138 | struct svc_export *exp; | 138 | struct svc_export *exp; |
@@ -226,7 +226,7 @@ out_nfserr: | |||
226 | */ | 226 | */ |
227 | __be32 | 227 | __be32 |
228 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | 228 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, |
229 | int len, struct svc_fh *resfh) | 229 | unsigned int len, struct svc_fh *resfh) |
230 | { | 230 | { |
231 | struct svc_export *exp; | 231 | struct svc_export *exp; |
232 | struct dentry *dentry; | 232 | struct dentry *dentry; |
@@ -1151,6 +1151,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1151 | } | 1151 | } |
1152 | #endif /* CONFIG_NFSD_V3 */ | 1152 | #endif /* CONFIG_NFSD_V3 */ |
1153 | 1153 | ||
1154 | __be32 | ||
1155 | nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, | ||
1156 | struct iattr *iap) | ||
1157 | { | ||
1158 | /* | ||
1159 | * Mode has already been set earlier in create: | ||
1160 | */ | ||
1161 | iap->ia_valid &= ~ATTR_MODE; | ||
1162 | /* | ||
1163 | * Setting uid/gid works only for root. Irix appears to | ||
1164 | * send along the gid on create when it tries to implement | ||
1165 | * setgid directories via NFS: | ||
1166 | */ | ||
1167 | if (current->fsuid != 0) | ||
1168 | iap->ia_valid &= ~(ATTR_UID|ATTR_GID); | ||
1169 | if (iap->ia_valid) | ||
1170 | return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); | ||
1171 | return 0; | ||
1172 | } | ||
1173 | |||
1154 | /* | 1174 | /* |
1155 | * Create a file (regular, directory, device, fifo); UNIX sockets | 1175 | * Create a file (regular, directory, device, fifo); UNIX sockets |
1156 | * not yet implemented. | 1176 | * not yet implemented. |
@@ -1167,6 +1187,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1167 | struct dentry *dentry, *dchild = NULL; | 1187 | struct dentry *dentry, *dchild = NULL; |
1168 | struct inode *dirp; | 1188 | struct inode *dirp; |
1169 | __be32 err; | 1189 | __be32 err; |
1190 | __be32 err2; | ||
1170 | int host_err; | 1191 | int host_err; |
1171 | 1192 | ||
1172 | err = nfserr_perm; | 1193 | err = nfserr_perm; |
@@ -1257,16 +1278,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1257 | } | 1278 | } |
1258 | 1279 | ||
1259 | 1280 | ||
1260 | /* Set file attributes. Mode has already been set and | 1281 | err2 = nfsd_create_setattr(rqstp, resfhp, iap); |
1261 | * setting uid/gid works only for root. Irix appears to | 1282 | if (err2) |
1262 | * send along the gid when it tries to implement setgid | 1283 | err = err2; |
1263 | * directories via NFS. | ||
1264 | */ | ||
1265 | if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { | ||
1266 | __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); | ||
1267 | if (err2) | ||
1268 | err = err2; | ||
1269 | } | ||
1270 | /* | 1284 | /* |
1271 | * Update the file handle to get the new inode info. | 1285 | * Update the file handle to get the new inode info. |
1272 | */ | 1286 | */ |
@@ -1295,6 +1309,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1295 | struct dentry *dentry, *dchild = NULL; | 1309 | struct dentry *dentry, *dchild = NULL; |
1296 | struct inode *dirp; | 1310 | struct inode *dirp; |
1297 | __be32 err; | 1311 | __be32 err; |
1312 | __be32 err2; | ||
1298 | int host_err; | 1313 | int host_err; |
1299 | __u32 v_mtime=0, v_atime=0; | 1314 | __u32 v_mtime=0, v_atime=0; |
1300 | 1315 | ||
@@ -1399,16 +1414,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
1399 | iap->ia_atime.tv_nsec = 0; | 1414 | iap->ia_atime.tv_nsec = 0; |
1400 | } | 1415 | } |
1401 | 1416 | ||
1402 | /* Set file attributes. | ||
1403 | * Irix appears to send along the gid when it tries to | ||
1404 | * implement setgid directories via NFS. Clear out all that cruft. | ||
1405 | */ | ||
1406 | set_attr: | 1417 | set_attr: |
1407 | if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { | 1418 | err2 = nfsd_create_setattr(rqstp, resfhp, iap); |
1408 | __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); | 1419 | if (err2) |
1409 | if (err2) | 1420 | err = err2; |
1410 | err = err2; | ||
1411 | } | ||
1412 | 1421 | ||
1413 | /* | 1422 | /* |
1414 | * Update the filehandle to get the new inode info. | 1423 | * Update the filehandle to get the new inode info. |
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index e2d1ce36b367..4babb2a129ac 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h | |||
@@ -173,14 +173,17 @@ void nlmclnt_next_cookie(struct nlm_cookie *); | |||
173 | /* | 173 | /* |
174 | * Host cache | 174 | * Host cache |
175 | */ | 175 | */ |
176 | struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *, int, int, const char *, int); | 176 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *, int, int, |
177 | struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *, const char *, int); | 177 | const char *, unsigned int); |
178 | struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *, | ||
179 | unsigned int); | ||
178 | struct rpc_clnt * nlm_bind_host(struct nlm_host *); | 180 | struct rpc_clnt * nlm_bind_host(struct nlm_host *); |
179 | void nlm_rebind_host(struct nlm_host *); | 181 | void nlm_rebind_host(struct nlm_host *); |
180 | struct nlm_host * nlm_get_host(struct nlm_host *); | 182 | struct nlm_host * nlm_get_host(struct nlm_host *); |
181 | void nlm_release_host(struct nlm_host *); | 183 | void nlm_release_host(struct nlm_host *); |
182 | void nlm_shutdown_hosts(void); | 184 | void nlm_shutdown_hosts(void); |
183 | extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int, u32); | 185 | extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, |
186 | unsigned int, u32); | ||
184 | void nsm_release(struct nsm_handle *); | 187 | void nsm_release(struct nsm_handle *); |
185 | 188 | ||
186 | 189 | ||
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 83a1f9f6237b..df18fa053bcd 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h | |||
@@ -29,7 +29,7 @@ struct svc_rqst; | |||
29 | /* Lock info passed via NLM */ | 29 | /* Lock info passed via NLM */ |
30 | struct nlm_lock { | 30 | struct nlm_lock { |
31 | char * caller; | 31 | char * caller; |
32 | int len; /* length of "caller" */ | 32 | unsigned int len; /* length of "caller" */ |
33 | struct nfs_fh fh; | 33 | struct nfs_fh fh; |
34 | struct xdr_netobj oh; | 34 | struct xdr_netobj oh; |
35 | u32 svid; | 35 | u32 svid; |
@@ -78,7 +78,7 @@ struct nlm_res { | |||
78 | */ | 78 | */ |
79 | struct nlm_reboot { | 79 | struct nlm_reboot { |
80 | char * mon; | 80 | char * mon; |
81 | int len; | 81 | unsigned int len; |
82 | u32 state; | 82 | u32 state; |
83 | __be32 addr; | 83 | __be32 addr; |
84 | __be32 vers; | 84 | __be32 vers; |
diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild index d9c5455808e5..e726fc3a4375 100644 --- a/include/linux/nfsd/Kbuild +++ b/include/linux/nfsd/Kbuild | |||
@@ -4,4 +4,3 @@ unifdef-y += stats.h | |||
4 | unifdef-y += syscall.h | 4 | unifdef-y += syscall.h |
5 | unifdef-y += nfsfh.h | 5 | unifdef-y += nfsfh.h |
6 | unifdef-y += debug.h | 6 | unifdef-y += debug.h |
7 | unifdef-y += auth.h | ||
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index 007480cd6a60..7b5d784cc858 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h | |||
@@ -72,8 +72,8 @@ enum { | |||
72 | */ | 72 | */ |
73 | #define RC_DELAY (HZ/5) | 73 | #define RC_DELAY (HZ/5) |
74 | 74 | ||
75 | void nfsd_cache_init(void); | 75 | int nfsd_reply_cache_init(void); |
76 | void nfsd_cache_shutdown(void); | 76 | void nfsd_reply_cache_shutdown(void); |
77 | int nfsd_cache_lookup(struct svc_rqst *, int); | 77 | int nfsd_cache_lookup(struct svc_rqst *, int); |
78 | void nfsd_cache_update(struct svc_rqst *, int, __be32 *); | 78 | void nfsd_cache_update(struct svc_rqst *, int, __be32 *); |
79 | 79 | ||
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index bcb7abafbca9..3a1687251367 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h | |||
@@ -122,7 +122,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); | |||
122 | /* | 122 | /* |
123 | * Function declarations | 123 | * Function declarations |
124 | */ | 124 | */ |
125 | void nfsd_export_init(void); | 125 | int nfsd_export_init(void); |
126 | void nfsd_export_shutdown(void); | 126 | void nfsd_export_shutdown(void); |
127 | void nfsd_export_flush(void); | 127 | void nfsd_export_flush(void); |
128 | void exp_readlock(void); | 128 | void exp_readlock(void); |
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 604a0d786bc6..8caf4c4f64e6 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/nfsd/debug.h> | 20 | #include <linux/nfsd/debug.h> |
21 | #include <linux/nfsd/nfsfh.h> | 21 | #include <linux/nfsd/nfsfh.h> |
22 | #include <linux/nfsd/export.h> | 22 | #include <linux/nfsd/export.h> |
23 | #include <linux/nfsd/auth.h> | ||
24 | #include <linux/nfsd/stats.h> | 23 | #include <linux/nfsd/stats.h> |
25 | /* | 24 | /* |
26 | * nfsd version | 25 | * nfsd version |
@@ -70,9 +69,9 @@ void nfsd_racache_shutdown(void); | |||
70 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | 69 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, |
71 | struct svc_export **expp); | 70 | struct svc_export **expp); |
72 | __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, | 71 | __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, |
73 | const char *, int, struct svc_fh *); | 72 | const char *, unsigned int, struct svc_fh *); |
74 | __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, | 73 | __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, |
75 | const char *, int, | 74 | const char *, unsigned int, |
76 | struct svc_export **, struct dentry **); | 75 | struct svc_export **, struct dentry **); |
77 | __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, | 76 | __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, |
78 | struct iattr *, int, time_t); | 77 | struct iattr *, int, time_t); |
diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h index 8bcddccb6c42..4e439765b705 100644 --- a/include/linux/nfsd/syscall.h +++ b/include/linux/nfsd/syscall.h | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/nfsd/const.h> | 18 | #include <linux/nfsd/const.h> |
19 | #include <linux/nfsd/export.h> | 19 | #include <linux/nfsd/export.h> |
20 | #include <linux/nfsd/nfsfh.h> | 20 | #include <linux/nfsd/nfsfh.h> |
21 | #include <linux/nfsd/auth.h> | ||
22 | 21 | ||
23 | /* | 22 | /* |
24 | * Version of the syscall interface | 23 | * Version of the syscall interface |
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h index 67885d5e6e50..a0132ef58f21 100644 --- a/include/linux/nfsd/xdr.h +++ b/include/linux/nfsd/xdr.h | |||
@@ -23,7 +23,7 @@ struct nfsd_sattrargs { | |||
23 | struct nfsd_diropargs { | 23 | struct nfsd_diropargs { |
24 | struct svc_fh fh; | 24 | struct svc_fh fh; |
25 | char * name; | 25 | char * name; |
26 | int len; | 26 | unsigned int len; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | struct nfsd_readargs { | 29 | struct nfsd_readargs { |
@@ -43,17 +43,17 @@ struct nfsd_writeargs { | |||
43 | struct nfsd_createargs { | 43 | struct nfsd_createargs { |
44 | struct svc_fh fh; | 44 | struct svc_fh fh; |
45 | char * name; | 45 | char * name; |
46 | int len; | 46 | unsigned int len; |
47 | struct iattr attrs; | 47 | struct iattr attrs; |
48 | }; | 48 | }; |
49 | 49 | ||
50 | struct nfsd_renameargs { | 50 | struct nfsd_renameargs { |
51 | struct svc_fh ffh; | 51 | struct svc_fh ffh; |
52 | char * fname; | 52 | char * fname; |
53 | int flen; | 53 | unsigned int flen; |
54 | struct svc_fh tfh; | 54 | struct svc_fh tfh; |
55 | char * tname; | 55 | char * tname; |
56 | int tlen; | 56 | unsigned int tlen; |
57 | }; | 57 | }; |
58 | 58 | ||
59 | struct nfsd_readlinkargs { | 59 | struct nfsd_readlinkargs { |
@@ -65,15 +65,15 @@ struct nfsd_linkargs { | |||
65 | struct svc_fh ffh; | 65 | struct svc_fh ffh; |
66 | struct svc_fh tfh; | 66 | struct svc_fh tfh; |
67 | char * tname; | 67 | char * tname; |
68 | int tlen; | 68 | unsigned int tlen; |
69 | }; | 69 | }; |
70 | 70 | ||
71 | struct nfsd_symlinkargs { | 71 | struct nfsd_symlinkargs { |
72 | struct svc_fh ffh; | 72 | struct svc_fh ffh; |
73 | char * fname; | 73 | char * fname; |
74 | int flen; | 74 | unsigned int flen; |
75 | char * tname; | 75 | char * tname; |
76 | int tlen; | 76 | unsigned int tlen; |
77 | struct iattr attrs; | 77 | struct iattr attrs; |
78 | }; | 78 | }; |
79 | 79 | ||
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h index 89d9d6061a62..421eddd65a25 100644 --- a/include/linux/nfsd/xdr3.h +++ b/include/linux/nfsd/xdr3.h | |||
@@ -21,7 +21,7 @@ struct nfsd3_sattrargs { | |||
21 | struct nfsd3_diropargs { | 21 | struct nfsd3_diropargs { |
22 | struct svc_fh fh; | 22 | struct svc_fh fh; |
23 | char * name; | 23 | char * name; |
24 | int len; | 24 | unsigned int len; |
25 | }; | 25 | }; |
26 | 26 | ||
27 | struct nfsd3_accessargs { | 27 | struct nfsd3_accessargs { |
@@ -48,7 +48,7 @@ struct nfsd3_writeargs { | |||
48 | struct nfsd3_createargs { | 48 | struct nfsd3_createargs { |
49 | struct svc_fh fh; | 49 | struct svc_fh fh; |
50 | char * name; | 50 | char * name; |
51 | int len; | 51 | unsigned int len; |
52 | int createmode; | 52 | int createmode; |
53 | struct iattr attrs; | 53 | struct iattr attrs; |
54 | __be32 * verf; | 54 | __be32 * verf; |
@@ -57,7 +57,7 @@ struct nfsd3_createargs { | |||
57 | struct nfsd3_mknodargs { | 57 | struct nfsd3_mknodargs { |
58 | struct svc_fh fh; | 58 | struct svc_fh fh; |
59 | char * name; | 59 | char * name; |
60 | int len; | 60 | unsigned int len; |
61 | __u32 ftype; | 61 | __u32 ftype; |
62 | __u32 major, minor; | 62 | __u32 major, minor; |
63 | struct iattr attrs; | 63 | struct iattr attrs; |
@@ -66,10 +66,10 @@ struct nfsd3_mknodargs { | |||
66 | struct nfsd3_renameargs { | 66 | struct nfsd3_renameargs { |
67 | struct svc_fh ffh; | 67 | struct svc_fh ffh; |
68 | char * fname; | 68 | char * fname; |
69 | int flen; | 69 | unsigned int flen; |
70 | struct svc_fh tfh; | 70 | struct svc_fh tfh; |
71 | char * tname; | 71 | char * tname; |
72 | int tlen; | 72 | unsigned int tlen; |
73 | }; | 73 | }; |
74 | 74 | ||
75 | struct nfsd3_readlinkargs { | 75 | struct nfsd3_readlinkargs { |
@@ -81,15 +81,15 @@ struct nfsd3_linkargs { | |||
81 | struct svc_fh ffh; | 81 | struct svc_fh ffh; |
82 | struct svc_fh tfh; | 82 | struct svc_fh tfh; |
83 | char * tname; | 83 | char * tname; |
84 | int tlen; | 84 | unsigned int tlen; |
85 | }; | 85 | }; |
86 | 86 | ||
87 | struct nfsd3_symlinkargs { | 87 | struct nfsd3_symlinkargs { |
88 | struct svc_fh ffh; | 88 | struct svc_fh ffh; |
89 | char * fname; | 89 | char * fname; |
90 | int flen; | 90 | unsigned int flen; |
91 | char * tname; | 91 | char * tname; |
92 | int tlen; | 92 | unsigned int tlen; |
93 | struct iattr attrs; | 93 | struct iattr attrs; |
94 | }; | 94 | }; |
95 | 95 | ||
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index b0ddfb41c790..27bd3e38ec5a 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h | |||
@@ -441,7 +441,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); | |||
441 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); | 441 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); |
442 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | 442 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, |
443 | struct dentry *dentry, __be32 *buffer, int *countp, | 443 | struct dentry *dentry, __be32 *buffer, int *countp, |
444 | u32 *bmval, struct svc_rqst *); | 444 | u32 *bmval, struct svc_rqst *, int ignore_crossmnt); |
445 | extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, | 445 | extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, |
446 | struct nfsd4_compound_state *, | 446 | struct nfsd4_compound_state *, |
447 | struct nfsd4_setclientid *setclid); | 447 | struct nfsd4_setclientid *setclid); |
diff --git a/include/linux/nfsd_idmap.h b/include/linux/nfsd_idmap.h index e82746fcad14..d4a2ac18bd4c 100644 --- a/include/linux/nfsd_idmap.h +++ b/include/linux/nfsd_idmap.h | |||
@@ -44,11 +44,16 @@ | |||
44 | #define IDMAP_NAMESZ 128 | 44 | #define IDMAP_NAMESZ 128 |
45 | 45 | ||
46 | #ifdef CONFIG_NFSD_V4 | 46 | #ifdef CONFIG_NFSD_V4 |
47 | void nfsd_idmap_init(void); | 47 | int nfsd_idmap_init(void); |
48 | void nfsd_idmap_shutdown(void); | 48 | void nfsd_idmap_shutdown(void); |
49 | #else | 49 | #else |
50 | static inline void nfsd_idmap_init(void) {}; | 50 | static inline int nfsd_idmap_init(void) |
51 | static inline void nfsd_idmap_shutdown(void) {}; | 51 | { |
52 | return 0; | ||
53 | } | ||
54 | static inline void nfsd_idmap_shutdown(void) | ||
55 | { | ||
56 | } | ||
52 | #endif | 57 | #endif |
53 | 58 | ||
54 | int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); | 59 | int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); |
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index bd7a6b0a87af..03547d6abee5 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h | |||
@@ -169,8 +169,8 @@ extern int cache_check(struct cache_detail *detail, | |||
169 | extern void cache_flush(void); | 169 | extern void cache_flush(void); |
170 | extern void cache_purge(struct cache_detail *detail); | 170 | extern void cache_purge(struct cache_detail *detail); |
171 | #define NEVER (0x7FFFFFFF) | 171 | #define NEVER (0x7FFFFFFF) |
172 | extern void cache_register(struct cache_detail *cd); | 172 | extern int cache_register(struct cache_detail *cd); |
173 | extern int cache_unregister(struct cache_detail *cd); | 173 | extern void cache_unregister(struct cache_detail *cd); |
174 | 174 | ||
175 | extern void qword_add(char **bpp, int *lp, char *str); | 175 | extern void qword_add(char **bpp, int *lp, char *str); |
176 | extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); | 176 | extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); |
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 3912cf16361e..10709cbe96fd 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h | |||
@@ -20,7 +20,7 @@ | |||
20 | #define RPCDBG_BIND 0x0020 | 20 | #define RPCDBG_BIND 0x0020 |
21 | #define RPCDBG_SCHED 0x0040 | 21 | #define RPCDBG_SCHED 0x0040 |
22 | #define RPCDBG_TRANS 0x0080 | 22 | #define RPCDBG_TRANS 0x0080 |
23 | #define RPCDBG_SVCSOCK 0x0100 | 23 | #define RPCDBG_SVCXPRT 0x0100 |
24 | #define RPCDBG_SVCDSP 0x0200 | 24 | #define RPCDBG_SVCDSP 0x0200 |
25 | #define RPCDBG_MISC 0x0400 | 25 | #define RPCDBG_MISC 0x0400 |
26 | #define RPCDBG_CACHE 0x0800 | 26 | #define RPCDBG_CACHE 0x0800 |
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 8531a70da73d..64c771056187 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
@@ -204,7 +204,7 @@ union svc_addr_u { | |||
204 | struct svc_rqst { | 204 | struct svc_rqst { |
205 | struct list_head rq_list; /* idle list */ | 205 | struct list_head rq_list; /* idle list */ |
206 | struct list_head rq_all; /* all threads list */ | 206 | struct list_head rq_all; /* all threads list */ |
207 | struct svc_sock * rq_sock; /* socket */ | 207 | struct svc_xprt * rq_xprt; /* transport ptr */ |
208 | struct sockaddr_storage rq_addr; /* peer address */ | 208 | struct sockaddr_storage rq_addr; /* peer address */ |
209 | size_t rq_addrlen; | 209 | size_t rq_addrlen; |
210 | 210 | ||
@@ -214,9 +214,10 @@ struct svc_rqst { | |||
214 | struct auth_ops * rq_authop; /* authentication flavour */ | 214 | struct auth_ops * rq_authop; /* authentication flavour */ |
215 | u32 rq_flavor; /* pseudoflavor */ | 215 | u32 rq_flavor; /* pseudoflavor */ |
216 | struct svc_cred rq_cred; /* auth info */ | 216 | struct svc_cred rq_cred; /* auth info */ |
217 | struct sk_buff * rq_skbuff; /* fast recv inet buffer */ | 217 | void * rq_xprt_ctxt; /* transport specific context ptr */ |
218 | struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ | 218 | struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ |
219 | 219 | ||
220 | size_t rq_xprt_hlen; /* xprt header len */ | ||
220 | struct xdr_buf rq_arg; | 221 | struct xdr_buf rq_arg; |
221 | struct xdr_buf rq_res; | 222 | struct xdr_buf rq_res; |
222 | struct page * rq_pages[RPCSVC_MAXPAGES]; | 223 | struct page * rq_pages[RPCSVC_MAXPAGES]; |
@@ -317,11 +318,12 @@ static inline void svc_free_res_pages(struct svc_rqst *rqstp) | |||
317 | 318 | ||
318 | struct svc_deferred_req { | 319 | struct svc_deferred_req { |
319 | u32 prot; /* protocol (UDP or TCP) */ | 320 | u32 prot; /* protocol (UDP or TCP) */ |
320 | struct svc_sock *svsk; | 321 | struct svc_xprt *xprt; |
321 | struct sockaddr_storage addr; /* where reply must go */ | 322 | struct sockaddr_storage addr; /* where reply must go */ |
322 | size_t addrlen; | 323 | size_t addrlen; |
323 | union svc_addr_u daddr; /* where reply must come from */ | 324 | union svc_addr_u daddr; /* where reply must come from */ |
324 | struct cache_deferred_req handle; | 325 | struct cache_deferred_req handle; |
326 | size_t xprt_hlen; | ||
325 | int argslen; | 327 | int argslen; |
326 | __be32 args[0]; | 328 | __be32 args[0]; |
327 | }; | 329 | }; |
@@ -382,6 +384,8 @@ struct svc_procedure { | |||
382 | */ | 384 | */ |
383 | struct svc_serv * svc_create(struct svc_program *, unsigned int, | 385 | struct svc_serv * svc_create(struct svc_program *, unsigned int, |
384 | void (*shutdown)(struct svc_serv*)); | 386 | void (*shutdown)(struct svc_serv*)); |
387 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, | ||
388 | struct svc_pool *pool); | ||
385 | int svc_create_thread(svc_thread_fn, struct svc_serv *); | 389 | int svc_create_thread(svc_thread_fn, struct svc_serv *); |
386 | void svc_exit_thread(struct svc_rqst *); | 390 | void svc_exit_thread(struct svc_rqst *); |
387 | struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, | 391 | struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, |
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h new file mode 100644 index 000000000000..c11bbcc081f9 --- /dev/null +++ b/include/linux/sunrpc/svc_rdma.h | |||
@@ -0,0 +1,262 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #ifndef SVC_RDMA_H | ||
43 | #define SVC_RDMA_H | ||
44 | #include <linux/sunrpc/xdr.h> | ||
45 | #include <linux/sunrpc/svcsock.h> | ||
46 | #include <linux/sunrpc/rpc_rdma.h> | ||
47 | #include <rdma/ib_verbs.h> | ||
48 | #include <rdma/rdma_cm.h> | ||
49 | #define SVCRDMA_DEBUG | ||
50 | |||
51 | /* RPC/RDMA parameters and stats */ | ||
52 | extern unsigned int svcrdma_ord; | ||
53 | extern unsigned int svcrdma_max_requests; | ||
54 | extern unsigned int svcrdma_max_req_size; | ||
55 | |||
56 | extern atomic_t rdma_stat_recv; | ||
57 | extern atomic_t rdma_stat_read; | ||
58 | extern atomic_t rdma_stat_write; | ||
59 | extern atomic_t rdma_stat_sq_starve; | ||
60 | extern atomic_t rdma_stat_rq_starve; | ||
61 | extern atomic_t rdma_stat_rq_poll; | ||
62 | extern atomic_t rdma_stat_rq_prod; | ||
63 | extern atomic_t rdma_stat_sq_poll; | ||
64 | extern atomic_t rdma_stat_sq_prod; | ||
65 | |||
66 | #define RPCRDMA_VERSION 1 | ||
67 | |||
68 | /* | ||
69 | * Contexts are built when an RDMA request is created and are a | ||
70 | * record of the resources that can be recovered when the request | ||
71 | * completes. | ||
72 | */ | ||
73 | struct svc_rdma_op_ctxt { | ||
74 | struct svc_rdma_op_ctxt *next; | ||
75 | struct xdr_buf arg; | ||
76 | struct list_head dto_q; | ||
77 | enum ib_wr_opcode wr_op; | ||
78 | enum ib_wc_status wc_status; | ||
79 | u32 byte_len; | ||
80 | struct svcxprt_rdma *xprt; | ||
81 | unsigned long flags; | ||
82 | enum dma_data_direction direction; | ||
83 | int count; | ||
84 | struct ib_sge sge[RPCSVC_MAXPAGES]; | ||
85 | struct page *pages[RPCSVC_MAXPAGES]; | ||
86 | }; | ||
87 | |||
88 | #define RDMACTXT_F_READ_DONE 1 | ||
89 | #define RDMACTXT_F_LAST_CTXT 2 | ||
90 | |||
91 | struct svcxprt_rdma { | ||
92 | struct svc_xprt sc_xprt; /* SVC transport structure */ | ||
93 | struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ | ||
94 | struct list_head sc_accept_q; /* Conn. waiting accept */ | ||
95 | int sc_ord; /* RDMA read limit */ | ||
96 | wait_queue_head_t sc_read_wait; | ||
97 | int sc_max_sge; | ||
98 | |||
99 | int sc_sq_depth; /* Depth of SQ */ | ||
100 | atomic_t sc_sq_count; /* Number of SQ WR on queue */ | ||
101 | |||
102 | int sc_max_requests; /* Depth of RQ */ | ||
103 | int sc_max_req_size; /* Size of each RQ WR buf */ | ||
104 | |||
105 | struct ib_pd *sc_pd; | ||
106 | |||
107 | struct svc_rdma_op_ctxt *sc_ctxt_head; | ||
108 | int sc_ctxt_cnt; | ||
109 | int sc_ctxt_bump; | ||
110 | int sc_ctxt_max; | ||
111 | spinlock_t sc_ctxt_lock; | ||
112 | struct list_head sc_rq_dto_q; | ||
113 | spinlock_t sc_rq_dto_lock; | ||
114 | struct ib_qp *sc_qp; | ||
115 | struct ib_cq *sc_rq_cq; | ||
116 | struct ib_cq *sc_sq_cq; | ||
117 | struct ib_mr *sc_phys_mr; /* MR for server memory */ | ||
118 | |||
119 | spinlock_t sc_lock; /* transport lock */ | ||
120 | |||
121 | wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ | ||
122 | unsigned long sc_flags; | ||
123 | struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ | ||
124 | struct list_head sc_read_complete_q; | ||
125 | spinlock_t sc_read_complete_lock; | ||
126 | }; | ||
127 | /* sc_flags */ | ||
128 | #define RDMAXPRT_RQ_PENDING 1 | ||
129 | #define RDMAXPRT_SQ_PENDING 2 | ||
130 | #define RDMAXPRT_CONN_PENDING 3 | ||
131 | |||
132 | #define RPCRDMA_LISTEN_BACKLOG 10 | ||
133 | /* The default ORD value is based on two outstanding full-size writes with a | ||
134 | * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ | ||
135 | #define RPCRDMA_ORD (64/4) | ||
136 | #define RPCRDMA_SQ_DEPTH_MULT 8 | ||
137 | #define RPCRDMA_MAX_THREADS 16 | ||
138 | #define RPCRDMA_MAX_REQUESTS 16 | ||
139 | #define RPCRDMA_MAX_REQ_SIZE 4096 | ||
140 | |||
141 | /* svc_rdma_marshal.c */ | ||
142 | extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, | ||
143 | int *, int *); | ||
144 | extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); | ||
145 | extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); | ||
146 | extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, | ||
147 | struct rpcrdma_msg *, | ||
148 | enum rpcrdma_errcode, u32 *); | ||
149 | extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); | ||
150 | extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); | ||
151 | extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, | ||
152 | u32, u64, u32); | ||
153 | extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, | ||
154 | struct rpcrdma_msg *, | ||
155 | struct rpcrdma_msg *, | ||
156 | enum rpcrdma_proc); | ||
157 | extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); | ||
158 | |||
159 | /* svc_rdma_recvfrom.c */ | ||
160 | extern int svc_rdma_recvfrom(struct svc_rqst *); | ||
161 | |||
162 | /* svc_rdma_sendto.c */ | ||
163 | extern int svc_rdma_sendto(struct svc_rqst *); | ||
164 | |||
165 | /* svc_rdma_transport.c */ | ||
166 | extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); | ||
167 | extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, | ||
168 | enum rpcrdma_errcode); | ||
169 | struct page *svc_rdma_get_page(void); | ||
170 | extern int svc_rdma_post_recv(struct svcxprt_rdma *); | ||
171 | extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); | ||
172 | extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); | ||
173 | extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); | ||
174 | extern void svc_sq_reap(struct svcxprt_rdma *); | ||
175 | extern void svc_rq_reap(struct svcxprt_rdma *); | ||
176 | extern struct svc_xprt_class svc_rdma_class; | ||
177 | extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); | ||
178 | |||
179 | /* svc_rdma.c */ | ||
180 | extern int svc_rdma_init(void); | ||
181 | extern void svc_rdma_cleanup(void); | ||
182 | |||
183 | /* | ||
184 | * Returns the address of the first read chunk or <nul> if no read chunk is | ||
185 | * present | ||
186 | */ | ||
187 | static inline struct rpcrdma_read_chunk * | ||
188 | svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) | ||
189 | { | ||
190 | struct rpcrdma_read_chunk *ch = | ||
191 | (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
192 | |||
193 | if (ch->rc_discrim == 0) | ||
194 | return NULL; | ||
195 | |||
196 | return ch; | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * Returns the address of the first read write array element or <nul> if no | ||
201 | * write array list is present | ||
202 | */ | ||
203 | static inline struct rpcrdma_write_array * | ||
204 | svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) | ||
205 | { | ||
206 | if (rmsgp->rm_body.rm_chunks[0] != 0 | ||
207 | || rmsgp->rm_body.rm_chunks[1] == 0) | ||
208 | return NULL; | ||
209 | |||
210 | return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Returns the address of the first reply array element or <nul> if no | ||
215 | * reply array is present | ||
216 | */ | ||
217 | static inline struct rpcrdma_write_array * | ||
218 | svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) | ||
219 | { | ||
220 | struct rpcrdma_read_chunk *rch; | ||
221 | struct rpcrdma_write_array *wr_ary; | ||
222 | struct rpcrdma_write_array *rp_ary; | ||
223 | |||
224 | /* XXX: Need to fix when reply list may occur with read-list and/or | ||
225 | * write list */ | ||
226 | if (rmsgp->rm_body.rm_chunks[0] != 0 || | ||
227 | rmsgp->rm_body.rm_chunks[1] != 0) | ||
228 | return NULL; | ||
229 | |||
230 | rch = svc_rdma_get_read_chunk(rmsgp); | ||
231 | if (rch) { | ||
232 | while (rch->rc_discrim) | ||
233 | rch++; | ||
234 | |||
235 | /* The reply list follows an empty write array located | ||
236 | * at 'rc_position' here. The reply array is at rc_target. | ||
237 | */ | ||
238 | rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; | ||
239 | |||
240 | goto found_it; | ||
241 | } | ||
242 | |||
243 | wr_ary = svc_rdma_get_write_array(rmsgp); | ||
244 | if (wr_ary) { | ||
245 | rp_ary = (struct rpcrdma_write_array *) | ||
246 | &wr_ary-> | ||
247 | wc_array[wr_ary->wc_nchunks].wc_target.rs_length; | ||
248 | |||
249 | goto found_it; | ||
250 | } | ||
251 | |||
252 | /* No read list, no write list */ | ||
253 | rp_ary = (struct rpcrdma_write_array *) | ||
254 | &rmsgp->rm_body.rm_chunks[2]; | ||
255 | |||
256 | found_it: | ||
257 | if (rp_ary->wc_discrim == 0) | ||
258 | return NULL; | ||
259 | |||
260 | return rp_ary; | ||
261 | } | ||
262 | #endif | ||
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h new file mode 100644 index 000000000000..6fd7b016517f --- /dev/null +++ b/include/linux/sunrpc/svc_xprt.h | |||
@@ -0,0 +1,159 @@ | |||
1 | /* | ||
2 | * linux/include/linux/sunrpc/svc_xprt.h | ||
3 | * | ||
4 | * RPC server transport I/O | ||
5 | */ | ||
6 | |||
7 | #ifndef SUNRPC_SVC_XPRT_H | ||
8 | #define SUNRPC_SVC_XPRT_H | ||
9 | |||
10 | #include <linux/sunrpc/svc.h> | ||
11 | #include <linux/module.h> | ||
12 | |||
13 | struct svc_xprt_ops { | ||
14 | struct svc_xprt *(*xpo_create)(struct svc_serv *, | ||
15 | struct sockaddr *, int, | ||
16 | int); | ||
17 | struct svc_xprt *(*xpo_accept)(struct svc_xprt *); | ||
18 | int (*xpo_has_wspace)(struct svc_xprt *); | ||
19 | int (*xpo_recvfrom)(struct svc_rqst *); | ||
20 | void (*xpo_prep_reply_hdr)(struct svc_rqst *); | ||
21 | int (*xpo_sendto)(struct svc_rqst *); | ||
22 | void (*xpo_release_rqst)(struct svc_rqst *); | ||
23 | void (*xpo_detach)(struct svc_xprt *); | ||
24 | void (*xpo_free)(struct svc_xprt *); | ||
25 | }; | ||
26 | |||
27 | struct svc_xprt_class { | ||
28 | const char *xcl_name; | ||
29 | struct module *xcl_owner; | ||
30 | struct svc_xprt_ops *xcl_ops; | ||
31 | struct list_head xcl_list; | ||
32 | u32 xcl_max_payload; | ||
33 | }; | ||
34 | |||
35 | struct svc_xprt { | ||
36 | struct svc_xprt_class *xpt_class; | ||
37 | struct svc_xprt_ops *xpt_ops; | ||
38 | struct kref xpt_ref; | ||
39 | struct list_head xpt_list; | ||
40 | struct list_head xpt_ready; | ||
41 | unsigned long xpt_flags; | ||
42 | #define XPT_BUSY 0 /* enqueued/receiving */ | ||
43 | #define XPT_CONN 1 /* conn pending */ | ||
44 | #define XPT_CLOSE 2 /* dead or dying */ | ||
45 | #define XPT_DATA 3 /* data pending */ | ||
46 | #define XPT_TEMP 4 /* connected transport */ | ||
47 | #define XPT_DEAD 6 /* transport closed */ | ||
48 | #define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ | ||
49 | #define XPT_DEFERRED 8 /* deferred request pending */ | ||
50 | #define XPT_OLD 9 /* used for xprt aging mark+sweep */ | ||
51 | #define XPT_DETACHED 10 /* detached from tempsocks list */ | ||
52 | #define XPT_LISTENER 11 /* listening endpoint */ | ||
53 | #define XPT_CACHE_AUTH 12 /* cache auth info */ | ||
54 | |||
55 | struct svc_pool *xpt_pool; /* current pool iff queued */ | ||
56 | struct svc_serv *xpt_server; /* service for transport */ | ||
57 | atomic_t xpt_reserved; /* space on outq that is rsvd */ | ||
58 | struct mutex xpt_mutex; /* to serialize sending data */ | ||
59 | spinlock_t xpt_lock; /* protects sk_deferred | ||
60 | * and xpt_auth_cache */ | ||
61 | void *xpt_auth_cache;/* auth cache */ | ||
62 | struct list_head xpt_deferred; /* deferred requests that need | ||
63 | * to be revisted */ | ||
64 | struct sockaddr_storage xpt_local; /* local address */ | ||
65 | size_t xpt_locallen; /* length of address */ | ||
66 | struct sockaddr_storage xpt_remote; /* remote peer's address */ | ||
67 | size_t xpt_remotelen; /* length of address */ | ||
68 | }; | ||
69 | |||
70 | int svc_reg_xprt_class(struct svc_xprt_class *); | ||
71 | void svc_unreg_xprt_class(struct svc_xprt_class *); | ||
72 | void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, | ||
73 | struct svc_serv *); | ||
74 | int svc_create_xprt(struct svc_serv *, char *, unsigned short, int); | ||
75 | void svc_xprt_enqueue(struct svc_xprt *xprt); | ||
76 | void svc_xprt_received(struct svc_xprt *); | ||
77 | void svc_xprt_put(struct svc_xprt *xprt); | ||
78 | void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); | ||
79 | void svc_close_xprt(struct svc_xprt *xprt); | ||
80 | void svc_delete_xprt(struct svc_xprt *xprt); | ||
81 | int svc_port_is_privileged(struct sockaddr *sin); | ||
82 | int svc_print_xprts(char *buf, int maxlen); | ||
83 | struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int); | ||
84 | int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); | ||
85 | |||
86 | static inline void svc_xprt_get(struct svc_xprt *xprt) | ||
87 | { | ||
88 | kref_get(&xprt->xpt_ref); | ||
89 | } | ||
90 | static inline void svc_xprt_set_local(struct svc_xprt *xprt, | ||
91 | struct sockaddr *sa, int salen) | ||
92 | { | ||
93 | memcpy(&xprt->xpt_local, sa, salen); | ||
94 | xprt->xpt_locallen = salen; | ||
95 | } | ||
96 | static inline void svc_xprt_set_remote(struct svc_xprt *xprt, | ||
97 | struct sockaddr *sa, int salen) | ||
98 | { | ||
99 | memcpy(&xprt->xpt_remote, sa, salen); | ||
100 | xprt->xpt_remotelen = salen; | ||
101 | } | ||
102 | static inline unsigned short svc_addr_port(struct sockaddr *sa) | ||
103 | { | ||
104 | unsigned short ret = 0; | ||
105 | switch (sa->sa_family) { | ||
106 | case AF_INET: | ||
107 | ret = ntohs(((struct sockaddr_in *)sa)->sin_port); | ||
108 | break; | ||
109 | case AF_INET6: | ||
110 | ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); | ||
111 | break; | ||
112 | } | ||
113 | return ret; | ||
114 | } | ||
115 | |||
116 | static inline size_t svc_addr_len(struct sockaddr *sa) | ||
117 | { | ||
118 | switch (sa->sa_family) { | ||
119 | case AF_INET: | ||
120 | return sizeof(struct sockaddr_in); | ||
121 | case AF_INET6: | ||
122 | return sizeof(struct sockaddr_in6); | ||
123 | } | ||
124 | return -EAFNOSUPPORT; | ||
125 | } | ||
126 | |||
127 | static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) | ||
128 | { | ||
129 | return svc_addr_port((struct sockaddr *)&xprt->xpt_local); | ||
130 | } | ||
131 | |||
132 | static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) | ||
133 | { | ||
134 | return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); | ||
135 | } | ||
136 | |||
137 | static inline char *__svc_print_addr(struct sockaddr *addr, | ||
138 | char *buf, size_t len) | ||
139 | { | ||
140 | switch (addr->sa_family) { | ||
141 | case AF_INET: | ||
142 | snprintf(buf, len, "%u.%u.%u.%u, port=%u", | ||
143 | NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), | ||
144 | ntohs(((struct sockaddr_in *) addr)->sin_port)); | ||
145 | break; | ||
146 | |||
147 | case AF_INET6: | ||
148 | snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", | ||
149 | NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), | ||
150 | ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); | ||
151 | break; | ||
152 | |||
153 | default: | ||
154 | snprintf(buf, len, "unknown address type: %d", addr->sa_family); | ||
155 | break; | ||
156 | } | ||
157 | return buf; | ||
158 | } | ||
159 | #endif /* SUNRPC_SVC_XPRT_H */ | ||
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index a53e0fa855d2..206f092ad4c7 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h | |||
@@ -10,42 +10,16 @@ | |||
10 | #define SUNRPC_SVCSOCK_H | 10 | #define SUNRPC_SVCSOCK_H |
11 | 11 | ||
12 | #include <linux/sunrpc/svc.h> | 12 | #include <linux/sunrpc/svc.h> |
13 | #include <linux/sunrpc/svc_xprt.h> | ||
13 | 14 | ||
14 | /* | 15 | /* |
15 | * RPC server socket. | 16 | * RPC server socket. |
16 | */ | 17 | */ |
17 | struct svc_sock { | 18 | struct svc_sock { |
18 | struct list_head sk_ready; /* list of ready sockets */ | 19 | struct svc_xprt sk_xprt; |
19 | struct list_head sk_list; /* list of all sockets */ | ||
20 | struct socket * sk_sock; /* berkeley socket layer */ | 20 | struct socket * sk_sock; /* berkeley socket layer */ |
21 | struct sock * sk_sk; /* INET layer */ | 21 | struct sock * sk_sk; /* INET layer */ |
22 | 22 | ||
23 | struct svc_pool * sk_pool; /* current pool iff queued */ | ||
24 | struct svc_serv * sk_server; /* service for this socket */ | ||
25 | atomic_t sk_inuse; /* use count */ | ||
26 | unsigned long sk_flags; | ||
27 | #define SK_BUSY 0 /* enqueued/receiving */ | ||
28 | #define SK_CONN 1 /* conn pending */ | ||
29 | #define SK_CLOSE 2 /* dead or dying */ | ||
30 | #define SK_DATA 3 /* data pending */ | ||
31 | #define SK_TEMP 4 /* temp (TCP) socket */ | ||
32 | #define SK_DEAD 6 /* socket closed */ | ||
33 | #define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */ | ||
34 | #define SK_DEFERRED 8 /* request on sk_deferred */ | ||
35 | #define SK_OLD 9 /* used for temp socket aging mark+sweep */ | ||
36 | #define SK_DETACHED 10 /* detached from tempsocks list */ | ||
37 | |||
38 | atomic_t sk_reserved; /* space on outq that is reserved */ | ||
39 | |||
40 | spinlock_t sk_lock; /* protects sk_deferred and | ||
41 | * sk_info_authunix */ | ||
42 | struct list_head sk_deferred; /* deferred requests that need to | ||
43 | * be revisted */ | ||
44 | struct mutex sk_mutex; /* to serialize sending data */ | ||
45 | |||
46 | int (*sk_recvfrom)(struct svc_rqst *rqstp); | ||
47 | int (*sk_sendto)(struct svc_rqst *rqstp); | ||
48 | |||
49 | /* We keep the old state_change and data_ready CB's here */ | 23 | /* We keep the old state_change and data_ready CB's here */ |
50 | void (*sk_ostate)(struct sock *); | 24 | void (*sk_ostate)(struct sock *); |
51 | void (*sk_odata)(struct sock *, int bytes); | 25 | void (*sk_odata)(struct sock *, int bytes); |
@@ -54,21 +28,12 @@ struct svc_sock { | |||
54 | /* private TCP part */ | 28 | /* private TCP part */ |
55 | int sk_reclen; /* length of record */ | 29 | int sk_reclen; /* length of record */ |
56 | int sk_tcplen; /* current read length */ | 30 | int sk_tcplen; /* current read length */ |
57 | time_t sk_lastrecv; /* time of last received request */ | ||
58 | |||
59 | /* cache of various info for TCP sockets */ | ||
60 | void *sk_info_authunix; | ||
61 | |||
62 | struct sockaddr_storage sk_local; /* local address */ | ||
63 | struct sockaddr_storage sk_remote; /* remote peer's address */ | ||
64 | int sk_remotelen; /* length of address */ | ||
65 | }; | 31 | }; |
66 | 32 | ||
67 | /* | 33 | /* |
68 | * Function prototypes. | 34 | * Function prototypes. |
69 | */ | 35 | */ |
70 | int svc_makesock(struct svc_serv *, int, unsigned short, int flags); | 36 | void svc_close_all(struct list_head *); |
71 | void svc_force_close_socket(struct svc_sock *); | ||
72 | int svc_recv(struct svc_rqst *, long); | 37 | int svc_recv(struct svc_rqst *, long); |
73 | int svc_send(struct svc_rqst *); | 38 | int svc_send(struct svc_rqst *); |
74 | void svc_drop(struct svc_rqst *); | 39 | void svc_drop(struct svc_rqst *); |
@@ -78,6 +43,8 @@ int svc_addsock(struct svc_serv *serv, | |||
78 | int fd, | 43 | int fd, |
79 | char *name_return, | 44 | char *name_return, |
80 | int *proto); | 45 | int *proto); |
46 | void svc_init_xprt_sock(void); | ||
47 | void svc_cleanup_xprt_sock(void); | ||
81 | 48 | ||
82 | /* | 49 | /* |
83 | * svc_makesock socket characteristics | 50 | * svc_makesock socket characteristics |
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 0751c9464d0f..e4057d729f03 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
@@ -112,7 +112,8 @@ struct xdr_buf { | |||
112 | __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); | 112 | __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); |
113 | __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); | 113 | __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); |
114 | __be32 *xdr_encode_string(__be32 *p, const char *s); | 114 | __be32 *xdr_encode_string(__be32 *p, const char *s); |
115 | __be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen); | 115 | __be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, |
116 | unsigned int maxlen); | ||
116 | __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); | 117 | __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); |
117 | __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); | 118 | __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); |
118 | 119 | ||
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 5c69a725e530..92e1dbe50947 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
@@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ | |||
11 | auth.o auth_null.o auth_unix.o \ | 11 | auth.o auth_null.o auth_unix.o \ |
12 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | 12 | svc.o svcsock.o svcauth.o svcauth_unix.o \ |
13 | rpcb_clnt.o timer.o xdr.o \ | 13 | rpcb_clnt.o timer.o xdr.o \ |
14 | sunrpc_syms.o cache.o rpc_pipe.o | 14 | sunrpc_syms.o cache.o rpc_pipe.o \ |
15 | svc_xprt.o | ||
15 | sunrpc-$(CONFIG_PROC_FS) += stats.o | 16 | sunrpc-$(CONFIG_PROC_FS) += stats.o |
16 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o | 17 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73940df6c460..481f984e9a22 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -224,38 +224,34 @@ static int rsi_parse(struct cache_detail *cd, | |||
224 | 224 | ||
225 | /* major/minor */ | 225 | /* major/minor */ |
226 | len = qword_get(&mesg, buf, mlen); | 226 | len = qword_get(&mesg, buf, mlen); |
227 | if (len < 0) | 227 | if (len <= 0) |
228 | goto out; | 228 | goto out; |
229 | if (len == 0) { | 229 | rsii.major_status = simple_strtoul(buf, &ep, 10); |
230 | if (*ep) | ||
231 | goto out; | ||
232 | len = qword_get(&mesg, buf, mlen); | ||
233 | if (len <= 0) | ||
234 | goto out; | ||
235 | rsii.minor_status = simple_strtoul(buf, &ep, 10); | ||
236 | if (*ep) | ||
230 | goto out; | 237 | goto out; |
231 | } else { | ||
232 | rsii.major_status = simple_strtoul(buf, &ep, 10); | ||
233 | if (*ep) | ||
234 | goto out; | ||
235 | len = qword_get(&mesg, buf, mlen); | ||
236 | if (len <= 0) | ||
237 | goto out; | ||
238 | rsii.minor_status = simple_strtoul(buf, &ep, 10); | ||
239 | if (*ep) | ||
240 | goto out; | ||
241 | 238 | ||
242 | /* out_handle */ | 239 | /* out_handle */ |
243 | len = qword_get(&mesg, buf, mlen); | 240 | len = qword_get(&mesg, buf, mlen); |
244 | if (len < 0) | 241 | if (len < 0) |
245 | goto out; | 242 | goto out; |
246 | status = -ENOMEM; | 243 | status = -ENOMEM; |
247 | if (dup_to_netobj(&rsii.out_handle, buf, len)) | 244 | if (dup_to_netobj(&rsii.out_handle, buf, len)) |
248 | goto out; | 245 | goto out; |
249 | 246 | ||
250 | /* out_token */ | 247 | /* out_token */ |
251 | len = qword_get(&mesg, buf, mlen); | 248 | len = qword_get(&mesg, buf, mlen); |
252 | status = -EINVAL; | 249 | status = -EINVAL; |
253 | if (len < 0) | 250 | if (len < 0) |
254 | goto out; | 251 | goto out; |
255 | status = -ENOMEM; | 252 | status = -ENOMEM; |
256 | if (dup_to_netobj(&rsii.out_token, buf, len)) | 253 | if (dup_to_netobj(&rsii.out_token, buf, len)) |
257 | goto out; | 254 | goto out; |
258 | } | ||
259 | rsii.h.expiry_time = expiry; | 255 | rsii.h.expiry_time = expiry; |
260 | rsip = rsi_update(&rsii, rsip); | 256 | rsip = rsi_update(&rsii, rsip); |
261 | status = 0; | 257 | status = 0; |
@@ -975,6 +971,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, | |||
975 | struct kvec *resv = &rqstp->rq_res.head[0]; | 971 | struct kvec *resv = &rqstp->rq_res.head[0]; |
976 | struct xdr_netobj tmpobj; | 972 | struct xdr_netobj tmpobj; |
977 | struct rsi *rsip, rsikey; | 973 | struct rsi *rsip, rsikey; |
974 | int ret; | ||
978 | 975 | ||
979 | /* Read the verifier; should be NULL: */ | 976 | /* Read the verifier; should be NULL: */ |
980 | *authp = rpc_autherr_badverf; | 977 | *authp = rpc_autherr_badverf; |
@@ -1014,23 +1011,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, | |||
1014 | /* No upcall result: */ | 1011 | /* No upcall result: */ |
1015 | return SVC_DROP; | 1012 | return SVC_DROP; |
1016 | case 0: | 1013 | case 0: |
1014 | ret = SVC_DROP; | ||
1017 | /* Got an answer to the upcall; use it: */ | 1015 | /* Got an answer to the upcall; use it: */ |
1018 | if (gss_write_init_verf(rqstp, rsip)) | 1016 | if (gss_write_init_verf(rqstp, rsip)) |
1019 | return SVC_DROP; | 1017 | goto out; |
1020 | if (resv->iov_len + 4 > PAGE_SIZE) | 1018 | if (resv->iov_len + 4 > PAGE_SIZE) |
1021 | return SVC_DROP; | 1019 | goto out; |
1022 | svc_putnl(resv, RPC_SUCCESS); | 1020 | svc_putnl(resv, RPC_SUCCESS); |
1023 | if (svc_safe_putnetobj(resv, &rsip->out_handle)) | 1021 | if (svc_safe_putnetobj(resv, &rsip->out_handle)) |
1024 | return SVC_DROP; | 1022 | goto out; |
1025 | if (resv->iov_len + 3 * 4 > PAGE_SIZE) | 1023 | if (resv->iov_len + 3 * 4 > PAGE_SIZE) |
1026 | return SVC_DROP; | 1024 | goto out; |
1027 | svc_putnl(resv, rsip->major_status); | 1025 | svc_putnl(resv, rsip->major_status); |
1028 | svc_putnl(resv, rsip->minor_status); | 1026 | svc_putnl(resv, rsip->minor_status); |
1029 | svc_putnl(resv, GSS_SEQ_WIN); | 1027 | svc_putnl(resv, GSS_SEQ_WIN); |
1030 | if (svc_safe_putnetobj(resv, &rsip->out_token)) | 1028 | if (svc_safe_putnetobj(resv, &rsip->out_token)) |
1031 | return SVC_DROP; | 1029 | goto out; |
1032 | } | 1030 | } |
1033 | return SVC_COMPLETE; | 1031 | ret = SVC_COMPLETE; |
1032 | out: | ||
1033 | cache_put(&rsip->h, &rsi_cache); | ||
1034 | return ret; | ||
1034 | } | 1035 | } |
1035 | 1036 | ||
1036 | /* | 1037 | /* |
@@ -1125,6 +1126,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
1125 | case RPC_GSS_PROC_DESTROY: | 1126 | case RPC_GSS_PROC_DESTROY: |
1126 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) | 1127 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) |
1127 | goto auth_err; | 1128 | goto auth_err; |
1129 | rsci->h.expiry_time = get_seconds(); | ||
1128 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); | 1130 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); |
1129 | if (resv->iov_len + 4 > PAGE_SIZE) | 1131 | if (resv->iov_len + 4 > PAGE_SIZE) |
1130 | goto drop; | 1132 | goto drop; |
@@ -1386,19 +1388,26 @@ int | |||
1386 | gss_svc_init(void) | 1388 | gss_svc_init(void) |
1387 | { | 1389 | { |
1388 | int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); | 1390 | int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); |
1389 | if (rv == 0) { | 1391 | if (rv) |
1390 | cache_register(&rsc_cache); | 1392 | return rv; |
1391 | cache_register(&rsi_cache); | 1393 | rv = cache_register(&rsc_cache); |
1392 | } | 1394 | if (rv) |
1395 | goto out1; | ||
1396 | rv = cache_register(&rsi_cache); | ||
1397 | if (rv) | ||
1398 | goto out2; | ||
1399 | return 0; | ||
1400 | out2: | ||
1401 | cache_unregister(&rsc_cache); | ||
1402 | out1: | ||
1403 | svc_auth_unregister(RPC_AUTH_GSS); | ||
1393 | return rv; | 1404 | return rv; |
1394 | } | 1405 | } |
1395 | 1406 | ||
1396 | void | 1407 | void |
1397 | gss_svc_shutdown(void) | 1408 | gss_svc_shutdown(void) |
1398 | { | 1409 | { |
1399 | if (cache_unregister(&rsc_cache)) | 1410 | cache_unregister(&rsc_cache); |
1400 | printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); | 1411 | cache_unregister(&rsi_cache); |
1401 | if (cache_unregister(&rsi_cache)) | ||
1402 | printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n"); | ||
1403 | svc_auth_unregister(RPC_AUTH_GSS); | 1412 | svc_auth_unregister(RPC_AUTH_GSS); |
1404 | } | 1413 | } |
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 73f053d0cc7a..636c8e04e0be 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c | |||
@@ -245,6 +245,7 @@ int cache_check(struct cache_detail *detail, | |||
245 | cache_put(h, detail); | 245 | cache_put(h, detail); |
246 | return rv; | 246 | return rv; |
247 | } | 247 | } |
248 | EXPORT_SYMBOL(cache_check); | ||
248 | 249 | ||
249 | /* | 250 | /* |
250 | * caches need to be periodically cleaned. | 251 | * caches need to be periodically cleaned. |
@@ -290,44 +291,78 @@ static const struct file_operations cache_flush_operations; | |||
290 | static void do_cache_clean(struct work_struct *work); | 291 | static void do_cache_clean(struct work_struct *work); |
291 | static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); | 292 | static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); |
292 | 293 | ||
293 | void cache_register(struct cache_detail *cd) | 294 | static void remove_cache_proc_entries(struct cache_detail *cd) |
294 | { | 295 | { |
295 | cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); | 296 | if (cd->proc_ent == NULL) |
296 | if (cd->proc_ent) { | 297 | return; |
297 | struct proc_dir_entry *p; | 298 | if (cd->flush_ent) |
298 | cd->proc_ent->owner = cd->owner; | 299 | remove_proc_entry("flush", cd->proc_ent); |
299 | cd->channel_ent = cd->content_ent = NULL; | 300 | if (cd->channel_ent) |
301 | remove_proc_entry("channel", cd->proc_ent); | ||
302 | if (cd->content_ent) | ||
303 | remove_proc_entry("content", cd->proc_ent); | ||
304 | cd->proc_ent = NULL; | ||
305 | remove_proc_entry(cd->name, proc_net_rpc); | ||
306 | } | ||
300 | 307 | ||
301 | p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, | 308 | #ifdef CONFIG_PROC_FS |
302 | cd->proc_ent); | 309 | static int create_cache_proc_entries(struct cache_detail *cd) |
303 | cd->flush_ent = p; | 310 | { |
304 | if (p) { | 311 | struct proc_dir_entry *p; |
305 | p->proc_fops = &cache_flush_operations; | ||
306 | p->owner = cd->owner; | ||
307 | p->data = cd; | ||
308 | } | ||
309 | 312 | ||
310 | if (cd->cache_request || cd->cache_parse) { | 313 | cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); |
311 | p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, | 314 | if (cd->proc_ent == NULL) |
312 | cd->proc_ent); | 315 | goto out_nomem; |
313 | cd->channel_ent = p; | 316 | cd->proc_ent->owner = cd->owner; |
314 | if (p) { | 317 | cd->channel_ent = cd->content_ent = NULL; |
315 | p->proc_fops = &cache_file_operations; | 318 | |
316 | p->owner = cd->owner; | 319 | p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent); |
317 | p->data = cd; | 320 | cd->flush_ent = p; |
318 | } | 321 | if (p == NULL) |
319 | } | 322 | goto out_nomem; |
320 | if (cd->cache_show) { | 323 | p->proc_fops = &cache_flush_operations; |
321 | p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, | 324 | p->owner = cd->owner; |
322 | cd->proc_ent); | 325 | p->data = cd; |
323 | cd->content_ent = p; | 326 | |
324 | if (p) { | 327 | if (cd->cache_request || cd->cache_parse) { |
325 | p->proc_fops = &content_file_operations; | 328 | p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, |
326 | p->owner = cd->owner; | 329 | cd->proc_ent); |
327 | p->data = cd; | 330 | cd->channel_ent = p; |
328 | } | 331 | if (p == NULL) |
329 | } | 332 | goto out_nomem; |
333 | p->proc_fops = &cache_file_operations; | ||
334 | p->owner = cd->owner; | ||
335 | p->data = cd; | ||
330 | } | 336 | } |
337 | if (cd->cache_show) { | ||
338 | p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, | ||
339 | cd->proc_ent); | ||
340 | cd->content_ent = p; | ||
341 | if (p == NULL) | ||
342 | goto out_nomem; | ||
343 | p->proc_fops = &content_file_operations; | ||
344 | p->owner = cd->owner; | ||
345 | p->data = cd; | ||
346 | } | ||
347 | return 0; | ||
348 | out_nomem: | ||
349 | remove_cache_proc_entries(cd); | ||
350 | return -ENOMEM; | ||
351 | } | ||
352 | #else /* CONFIG_PROC_FS */ | ||
353 | static int create_cache_proc_entries(struct cache_detail *cd) | ||
354 | { | ||
355 | return 0; | ||
356 | } | ||
357 | #endif | ||
358 | |||
359 | int cache_register(struct cache_detail *cd) | ||
360 | { | ||
361 | int ret; | ||
362 | |||
363 | ret = create_cache_proc_entries(cd); | ||
364 | if (ret) | ||
365 | return ret; | ||
331 | rwlock_init(&cd->hash_lock); | 366 | rwlock_init(&cd->hash_lock); |
332 | INIT_LIST_HEAD(&cd->queue); | 367 | INIT_LIST_HEAD(&cd->queue); |
333 | spin_lock(&cache_list_lock); | 368 | spin_lock(&cache_list_lock); |
@@ -341,9 +376,11 @@ void cache_register(struct cache_detail *cd) | |||
341 | 376 | ||
342 | /* start the cleaning process */ | 377 | /* start the cleaning process */ |
343 | schedule_delayed_work(&cache_cleaner, 0); | 378 | schedule_delayed_work(&cache_cleaner, 0); |
379 | return 0; | ||
344 | } | 380 | } |
381 | EXPORT_SYMBOL(cache_register); | ||
345 | 382 | ||
346 | int cache_unregister(struct cache_detail *cd) | 383 | void cache_unregister(struct cache_detail *cd) |
347 | { | 384 | { |
348 | cache_purge(cd); | 385 | cache_purge(cd); |
349 | spin_lock(&cache_list_lock); | 386 | spin_lock(&cache_list_lock); |
@@ -351,30 +388,23 @@ int cache_unregister(struct cache_detail *cd) | |||
351 | if (cd->entries || atomic_read(&cd->inuse)) { | 388 | if (cd->entries || atomic_read(&cd->inuse)) { |
352 | write_unlock(&cd->hash_lock); | 389 | write_unlock(&cd->hash_lock); |
353 | spin_unlock(&cache_list_lock); | 390 | spin_unlock(&cache_list_lock); |
354 | return -EBUSY; | 391 | goto out; |
355 | } | 392 | } |
356 | if (current_detail == cd) | 393 | if (current_detail == cd) |
357 | current_detail = NULL; | 394 | current_detail = NULL; |
358 | list_del_init(&cd->others); | 395 | list_del_init(&cd->others); |
359 | write_unlock(&cd->hash_lock); | 396 | write_unlock(&cd->hash_lock); |
360 | spin_unlock(&cache_list_lock); | 397 | spin_unlock(&cache_list_lock); |
361 | if (cd->proc_ent) { | 398 | remove_cache_proc_entries(cd); |
362 | if (cd->flush_ent) | ||
363 | remove_proc_entry("flush", cd->proc_ent); | ||
364 | if (cd->channel_ent) | ||
365 | remove_proc_entry("channel", cd->proc_ent); | ||
366 | if (cd->content_ent) | ||
367 | remove_proc_entry("content", cd->proc_ent); | ||
368 | |||
369 | cd->proc_ent = NULL; | ||
370 | remove_proc_entry(cd->name, proc_net_rpc); | ||
371 | } | ||
372 | if (list_empty(&cache_list)) { | 399 | if (list_empty(&cache_list)) { |
373 | /* module must be being unloaded so its safe to kill the worker */ | 400 | /* module must be being unloaded so its safe to kill the worker */ |
374 | cancel_delayed_work_sync(&cache_cleaner); | 401 | cancel_delayed_work_sync(&cache_cleaner); |
375 | } | 402 | } |
376 | return 0; | 403 | return; |
404 | out: | ||
405 | printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); | ||
377 | } | 406 | } |
407 | EXPORT_SYMBOL(cache_unregister); | ||
378 | 408 | ||
379 | /* clean cache tries to find something to clean | 409 | /* clean cache tries to find something to clean |
380 | * and cleans it. | 410 | * and cleans it. |
@@ -489,6 +519,7 @@ void cache_flush(void) | |||
489 | while (cache_clean() != -1) | 519 | while (cache_clean() != -1) |
490 | cond_resched(); | 520 | cond_resched(); |
491 | } | 521 | } |
522 | EXPORT_SYMBOL(cache_flush); | ||
492 | 523 | ||
493 | void cache_purge(struct cache_detail *detail) | 524 | void cache_purge(struct cache_detail *detail) |
494 | { | 525 | { |
@@ -497,7 +528,7 @@ void cache_purge(struct cache_detail *detail) | |||
497 | cache_flush(); | 528 | cache_flush(); |
498 | detail->flush_time = 1; | 529 | detail->flush_time = 1; |
499 | } | 530 | } |
500 | 531 | EXPORT_SYMBOL(cache_purge); | |
501 | 532 | ||
502 | 533 | ||
503 | /* | 534 | /* |
@@ -634,13 +665,13 @@ void cache_clean_deferred(void *owner) | |||
634 | /* | 665 | /* |
635 | * communicate with user-space | 666 | * communicate with user-space |
636 | * | 667 | * |
637 | * We have a magic /proc file - /proc/sunrpc/cache | 668 | * We have a magic /proc file - /proc/sunrpc/<cachename>/channel. |
638 | * On read, you get a full request, or block | 669 | * On read, you get a full request, or block. |
639 | * On write, an update request is processed | 670 | * On write, an update request is processed. |
640 | * Poll works if anything to read, and always allows write | 671 | * Poll works if anything to read, and always allows write. |
641 | * | 672 | * |
642 | * Implemented by linked list of requests. Each open file has | 673 | * Implemented by linked list of requests. Each open file has |
643 | * a ->private that also exists in this list. New request are added | 674 | * a ->private that also exists in this list. New requests are added |
644 | * to the end and may wakeup and preceding readers. | 675 | * to the end and may wakeup and preceding readers. |
645 | * New readers are added to the head. If, on read, an item is found with | 676 | * New readers are added to the head. If, on read, an item is found with |
646 | * CACHE_UPCALLING clear, we free it from the list. | 677 | * CACHE_UPCALLING clear, we free it from the list. |
@@ -963,6 +994,7 @@ void qword_add(char **bpp, int *lp, char *str) | |||
963 | *bpp = bp; | 994 | *bpp = bp; |
964 | *lp = len; | 995 | *lp = len; |
965 | } | 996 | } |
997 | EXPORT_SYMBOL(qword_add); | ||
966 | 998 | ||
967 | void qword_addhex(char **bpp, int *lp, char *buf, int blen) | 999 | void qword_addhex(char **bpp, int *lp, char *buf, int blen) |
968 | { | 1000 | { |
@@ -991,6 +1023,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen) | |||
991 | *bpp = bp; | 1023 | *bpp = bp; |
992 | *lp = len; | 1024 | *lp = len; |
993 | } | 1025 | } |
1026 | EXPORT_SYMBOL(qword_addhex); | ||
994 | 1027 | ||
995 | static void warn_no_listener(struct cache_detail *detail) | 1028 | static void warn_no_listener(struct cache_detail *detail) |
996 | { | 1029 | { |
@@ -1113,6 +1146,7 @@ int qword_get(char **bpp, char *dest, int bufsize) | |||
1113 | *dest = '\0'; | 1146 | *dest = '\0'; |
1114 | return len; | 1147 | return len; |
1115 | } | 1148 | } |
1149 | EXPORT_SYMBOL(qword_get); | ||
1116 | 1150 | ||
1117 | 1151 | ||
1118 | /* | 1152 | /* |
@@ -1244,18 +1278,18 @@ static ssize_t read_flush(struct file *file, char __user *buf, | |||
1244 | struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; | 1278 | struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; |
1245 | char tbuf[20]; | 1279 | char tbuf[20]; |
1246 | unsigned long p = *ppos; | 1280 | unsigned long p = *ppos; |
1247 | int len; | 1281 | size_t len; |
1248 | 1282 | ||
1249 | sprintf(tbuf, "%lu\n", cd->flush_time); | 1283 | sprintf(tbuf, "%lu\n", cd->flush_time); |
1250 | len = strlen(tbuf); | 1284 | len = strlen(tbuf); |
1251 | if (p >= len) | 1285 | if (p >= len) |
1252 | return 0; | 1286 | return 0; |
1253 | len -= p; | 1287 | len -= p; |
1254 | if (len > count) len = count; | 1288 | if (len > count) |
1289 | len = count; | ||
1255 | if (copy_to_user(buf, (void*)(tbuf+p), len)) | 1290 | if (copy_to_user(buf, (void*)(tbuf+p), len)) |
1256 | len = -EFAULT; | 1291 | return -EFAULT; |
1257 | else | 1292 | *ppos += len; |
1258 | *ppos += len; | ||
1259 | return len; | 1293 | return len; |
1260 | } | 1294 | } |
1261 | 1295 | ||
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 74df2d358e61..5a16875f5ac8 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c | |||
@@ -33,7 +33,7 @@ struct proc_dir_entry *proc_net_rpc = NULL; | |||
33 | static int rpc_proc_show(struct seq_file *seq, void *v) { | 33 | static int rpc_proc_show(struct seq_file *seq, void *v) { |
34 | const struct rpc_stat *statp = seq->private; | 34 | const struct rpc_stat *statp = seq->private; |
35 | const struct rpc_program *prog = statp->program; | 35 | const struct rpc_program *prog = statp->program; |
36 | int i, j; | 36 | unsigned int i, j; |
37 | 37 | ||
38 | seq_printf(seq, | 38 | seq_printf(seq, |
39 | "net %u %u %u %u\n", | 39 | "net %u %u %u %u\n", |
@@ -81,7 +81,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { | |||
81 | const struct svc_program *prog = statp->program; | 81 | const struct svc_program *prog = statp->program; |
82 | const struct svc_procedure *proc; | 82 | const struct svc_procedure *proc; |
83 | const struct svc_version *vers; | 83 | const struct svc_version *vers; |
84 | int i, j; | 84 | unsigned int i, j; |
85 | 85 | ||
86 | seq_printf(seq, | 86 | seq_printf(seq, |
87 | "net %u %u %u %u\n", | 87 | "net %u %u %u %u\n", |
@@ -106,6 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { | |||
106 | seq_putc(seq, '\n'); | 106 | seq_putc(seq, '\n'); |
107 | } | 107 | } |
108 | } | 108 | } |
109 | EXPORT_SYMBOL(svc_seq_show); | ||
109 | 110 | ||
110 | /** | 111 | /** |
111 | * rpc_alloc_iostats - allocate an rpc_iostats structure | 112 | * rpc_alloc_iostats - allocate an rpc_iostats structure |
@@ -255,12 +256,14 @@ svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) | |||
255 | { | 256 | { |
256 | return do_register(statp->program->pg_name, statp, fops); | 257 | return do_register(statp->program->pg_name, statp, fops); |
257 | } | 258 | } |
259 | EXPORT_SYMBOL(svc_proc_register); | ||
258 | 260 | ||
259 | void | 261 | void |
260 | svc_proc_unregister(const char *name) | 262 | svc_proc_unregister(const char *name) |
261 | { | 263 | { |
262 | remove_proc_entry(name, proc_net_rpc); | 264 | remove_proc_entry(name, proc_net_rpc); |
263 | } | 265 | } |
266 | EXPORT_SYMBOL(svc_proc_unregister); | ||
264 | 267 | ||
265 | void | 268 | void |
266 | rpc_proc_init(void) | 269 | rpc_proc_init(void) |
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 1a7e309d008b..843629f55763 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -22,48 +22,6 @@ | |||
22 | #include <linux/sunrpc/rpc_pipe_fs.h> | 22 | #include <linux/sunrpc/rpc_pipe_fs.h> |
23 | #include <linux/sunrpc/xprtsock.h> | 23 | #include <linux/sunrpc/xprtsock.h> |
24 | 24 | ||
25 | /* RPC server stuff */ | ||
26 | EXPORT_SYMBOL(svc_create); | ||
27 | EXPORT_SYMBOL(svc_create_thread); | ||
28 | EXPORT_SYMBOL(svc_create_pooled); | ||
29 | EXPORT_SYMBOL(svc_set_num_threads); | ||
30 | EXPORT_SYMBOL(svc_exit_thread); | ||
31 | EXPORT_SYMBOL(svc_destroy); | ||
32 | EXPORT_SYMBOL(svc_drop); | ||
33 | EXPORT_SYMBOL(svc_process); | ||
34 | EXPORT_SYMBOL(svc_recv); | ||
35 | EXPORT_SYMBOL(svc_wake_up); | ||
36 | EXPORT_SYMBOL(svc_makesock); | ||
37 | EXPORT_SYMBOL(svc_reserve); | ||
38 | EXPORT_SYMBOL(svc_auth_register); | ||
39 | EXPORT_SYMBOL(auth_domain_lookup); | ||
40 | EXPORT_SYMBOL(svc_authenticate); | ||
41 | EXPORT_SYMBOL(svc_set_client); | ||
42 | |||
43 | /* RPC statistics */ | ||
44 | #ifdef CONFIG_PROC_FS | ||
45 | EXPORT_SYMBOL(svc_proc_register); | ||
46 | EXPORT_SYMBOL(svc_proc_unregister); | ||
47 | EXPORT_SYMBOL(svc_seq_show); | ||
48 | #endif | ||
49 | |||
50 | /* caching... */ | ||
51 | EXPORT_SYMBOL(auth_domain_find); | ||
52 | EXPORT_SYMBOL(auth_domain_put); | ||
53 | EXPORT_SYMBOL(auth_unix_add_addr); | ||
54 | EXPORT_SYMBOL(auth_unix_forget_old); | ||
55 | EXPORT_SYMBOL(auth_unix_lookup); | ||
56 | EXPORT_SYMBOL(cache_check); | ||
57 | EXPORT_SYMBOL(cache_flush); | ||
58 | EXPORT_SYMBOL(cache_purge); | ||
59 | EXPORT_SYMBOL(cache_register); | ||
60 | EXPORT_SYMBOL(cache_unregister); | ||
61 | EXPORT_SYMBOL(qword_add); | ||
62 | EXPORT_SYMBOL(qword_addhex); | ||
63 | EXPORT_SYMBOL(qword_get); | ||
64 | EXPORT_SYMBOL(svcauth_unix_purge); | ||
65 | EXPORT_SYMBOL(unix_domain_find); | ||
66 | |||
67 | extern struct cache_detail ip_map_cache, unix_gid_cache; | 25 | extern struct cache_detail ip_map_cache, unix_gid_cache; |
68 | 26 | ||
69 | static int __init | 27 | static int __init |
@@ -85,7 +43,8 @@ init_sunrpc(void) | |||
85 | #endif | 43 | #endif |
86 | cache_register(&ip_map_cache); | 44 | cache_register(&ip_map_cache); |
87 | cache_register(&unix_gid_cache); | 45 | cache_register(&unix_gid_cache); |
88 | init_socket_xprt(); | 46 | svc_init_xprt_sock(); /* svc sock transport */ |
47 | init_socket_xprt(); /* clnt sock transport */ | ||
89 | rpcauth_init_module(); | 48 | rpcauth_init_module(); |
90 | out: | 49 | out: |
91 | return err; | 50 | return err; |
@@ -96,12 +55,11 @@ cleanup_sunrpc(void) | |||
96 | { | 55 | { |
97 | rpcauth_remove_module(); | 56 | rpcauth_remove_module(); |
98 | cleanup_socket_xprt(); | 57 | cleanup_socket_xprt(); |
58 | svc_cleanup_xprt_sock(); | ||
99 | unregister_rpc_pipefs(); | 59 | unregister_rpc_pipefs(); |
100 | rpc_destroy_mempool(); | 60 | rpc_destroy_mempool(); |
101 | if (cache_unregister(&ip_map_cache)) | 61 | cache_unregister(&ip_map_cache); |
102 | printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); | 62 | cache_unregister(&unix_gid_cache); |
103 | if (cache_unregister(&unix_gid_cache)) | ||
104 | printk(KERN_ERR "sunrpc: failed to unregister unix_gid cache\n"); | ||
105 | #ifdef RPC_DEBUG | 63 | #ifdef RPC_DEBUG |
106 | rpc_unregister_sysctl(); | 64 | rpc_unregister_sysctl(); |
107 | #endif | 65 | #endif |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4ad5fbbb18b4..a290e1523297 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
@@ -364,7 +364,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
364 | void (*shutdown)(struct svc_serv *serv)) | 364 | void (*shutdown)(struct svc_serv *serv)) |
365 | { | 365 | { |
366 | struct svc_serv *serv; | 366 | struct svc_serv *serv; |
367 | int vers; | 367 | unsigned int vers; |
368 | unsigned int xdrsize; | 368 | unsigned int xdrsize; |
369 | unsigned int i; | 369 | unsigned int i; |
370 | 370 | ||
@@ -433,6 +433,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
433 | { | 433 | { |
434 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); | 434 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); |
435 | } | 435 | } |
436 | EXPORT_SYMBOL(svc_create); | ||
436 | 437 | ||
437 | struct svc_serv * | 438 | struct svc_serv * |
438 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | 439 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, |
@@ -452,6 +453,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | |||
452 | 453 | ||
453 | return serv; | 454 | return serv; |
454 | } | 455 | } |
456 | EXPORT_SYMBOL(svc_create_pooled); | ||
455 | 457 | ||
456 | /* | 458 | /* |
457 | * Destroy an RPC service. Should be called with the BKL held | 459 | * Destroy an RPC service. Should be called with the BKL held |
@@ -459,9 +461,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | |||
459 | void | 461 | void |
460 | svc_destroy(struct svc_serv *serv) | 462 | svc_destroy(struct svc_serv *serv) |
461 | { | 463 | { |
462 | struct svc_sock *svsk; | ||
463 | struct svc_sock *tmp; | ||
464 | |||
465 | dprintk("svc: svc_destroy(%s, %d)\n", | 464 | dprintk("svc: svc_destroy(%s, %d)\n", |
466 | serv->sv_program->pg_name, | 465 | serv->sv_program->pg_name, |
467 | serv->sv_nrthreads); | 466 | serv->sv_nrthreads); |
@@ -476,14 +475,12 @@ svc_destroy(struct svc_serv *serv) | |||
476 | 475 | ||
477 | del_timer_sync(&serv->sv_temptimer); | 476 | del_timer_sync(&serv->sv_temptimer); |
478 | 477 | ||
479 | list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list) | 478 | svc_close_all(&serv->sv_tempsocks); |
480 | svc_force_close_socket(svsk); | ||
481 | 479 | ||
482 | if (serv->sv_shutdown) | 480 | if (serv->sv_shutdown) |
483 | serv->sv_shutdown(serv); | 481 | serv->sv_shutdown(serv); |
484 | 482 | ||
485 | list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list) | 483 | svc_close_all(&serv->sv_permsocks); |
486 | svc_force_close_socket(svsk); | ||
487 | 484 | ||
488 | BUG_ON(!list_empty(&serv->sv_permsocks)); | 485 | BUG_ON(!list_empty(&serv->sv_permsocks)); |
489 | BUG_ON(!list_empty(&serv->sv_tempsocks)); | 486 | BUG_ON(!list_empty(&serv->sv_tempsocks)); |
@@ -498,6 +495,7 @@ svc_destroy(struct svc_serv *serv) | |||
498 | kfree(serv->sv_pools); | 495 | kfree(serv->sv_pools); |
499 | kfree(serv); | 496 | kfree(serv); |
500 | } | 497 | } |
498 | EXPORT_SYMBOL(svc_destroy); | ||
501 | 499 | ||
502 | /* | 500 | /* |
503 | * Allocate an RPC server's buffer space. | 501 | * Allocate an RPC server's buffer space. |
@@ -536,31 +534,17 @@ svc_release_buffer(struct svc_rqst *rqstp) | |||
536 | put_page(rqstp->rq_pages[i]); | 534 | put_page(rqstp->rq_pages[i]); |
537 | } | 535 | } |
538 | 536 | ||
539 | /* | 537 | struct svc_rqst * |
540 | * Create a thread in the given pool. Caller must hold BKL. | 538 | svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) |
541 | * On a NUMA or SMP machine, with a multi-pool serv, the thread | ||
542 | * will be restricted to run on the cpus belonging to the pool. | ||
543 | */ | ||
544 | static int | ||
545 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | ||
546 | struct svc_pool *pool) | ||
547 | { | 539 | { |
548 | struct svc_rqst *rqstp; | 540 | struct svc_rqst *rqstp; |
549 | int error = -ENOMEM; | ||
550 | int have_oldmask = 0; | ||
551 | cpumask_t oldmask; | ||
552 | 541 | ||
553 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); | 542 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); |
554 | if (!rqstp) | 543 | if (!rqstp) |
555 | goto out; | 544 | goto out_enomem; |
556 | 545 | ||
557 | init_waitqueue_head(&rqstp->rq_wait); | 546 | init_waitqueue_head(&rqstp->rq_wait); |
558 | 547 | ||
559 | if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
560 | || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
561 | || !svc_init_buffer(rqstp, serv->sv_max_mesg)) | ||
562 | goto out_thread; | ||
563 | |||
564 | serv->sv_nrthreads++; | 548 | serv->sv_nrthreads++; |
565 | spin_lock_bh(&pool->sp_lock); | 549 | spin_lock_bh(&pool->sp_lock); |
566 | pool->sp_nrthreads++; | 550 | pool->sp_nrthreads++; |
@@ -569,6 +553,45 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | |||
569 | rqstp->rq_server = serv; | 553 | rqstp->rq_server = serv; |
570 | rqstp->rq_pool = pool; | 554 | rqstp->rq_pool = pool; |
571 | 555 | ||
556 | rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); | ||
557 | if (!rqstp->rq_argp) | ||
558 | goto out_thread; | ||
559 | |||
560 | rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); | ||
561 | if (!rqstp->rq_resp) | ||
562 | goto out_thread; | ||
563 | |||
564 | if (!svc_init_buffer(rqstp, serv->sv_max_mesg)) | ||
565 | goto out_thread; | ||
566 | |||
567 | return rqstp; | ||
568 | out_thread: | ||
569 | svc_exit_thread(rqstp); | ||
570 | out_enomem: | ||
571 | return ERR_PTR(-ENOMEM); | ||
572 | } | ||
573 | EXPORT_SYMBOL(svc_prepare_thread); | ||
574 | |||
575 | /* | ||
576 | * Create a thread in the given pool. Caller must hold BKL. | ||
577 | * On a NUMA or SMP machine, with a multi-pool serv, the thread | ||
578 | * will be restricted to run on the cpus belonging to the pool. | ||
579 | */ | ||
580 | static int | ||
581 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | ||
582 | struct svc_pool *pool) | ||
583 | { | ||
584 | struct svc_rqst *rqstp; | ||
585 | int error = -ENOMEM; | ||
586 | int have_oldmask = 0; | ||
587 | cpumask_t oldmask; | ||
588 | |||
589 | rqstp = svc_prepare_thread(serv, pool); | ||
590 | if (IS_ERR(rqstp)) { | ||
591 | error = PTR_ERR(rqstp); | ||
592 | goto out; | ||
593 | } | ||
594 | |||
572 | if (serv->sv_nrpools > 1) | 595 | if (serv->sv_nrpools > 1) |
573 | have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); | 596 | have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); |
574 | 597 | ||
@@ -597,6 +620,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | |||
597 | { | 620 | { |
598 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); | 621 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); |
599 | } | 622 | } |
623 | EXPORT_SYMBOL(svc_create_thread); | ||
600 | 624 | ||
601 | /* | 625 | /* |
602 | * Choose a pool in which to create a new thread, for svc_set_num_threads | 626 | * Choose a pool in which to create a new thread, for svc_set_num_threads |
@@ -700,6 +724,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) | |||
700 | 724 | ||
701 | return error; | 725 | return error; |
702 | } | 726 | } |
727 | EXPORT_SYMBOL(svc_set_num_threads); | ||
703 | 728 | ||
704 | /* | 729 | /* |
705 | * Called from a server thread as it's exiting. Caller must hold BKL. | 730 | * Called from a server thread as it's exiting. Caller must hold BKL. |
@@ -726,6 +751,7 @@ svc_exit_thread(struct svc_rqst *rqstp) | |||
726 | if (serv) | 751 | if (serv) |
727 | svc_destroy(serv); | 752 | svc_destroy(serv); |
728 | } | 753 | } |
754 | EXPORT_SYMBOL(svc_exit_thread); | ||
729 | 755 | ||
730 | /* | 756 | /* |
731 | * Register an RPC service with the local portmapper. | 757 | * Register an RPC service with the local portmapper. |
@@ -737,7 +763,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) | |||
737 | { | 763 | { |
738 | struct svc_program *progp; | 764 | struct svc_program *progp; |
739 | unsigned long flags; | 765 | unsigned long flags; |
740 | int i, error = 0, dummy; | 766 | unsigned int i; |
767 | int error = 0, dummy; | ||
741 | 768 | ||
742 | if (!port) | 769 | if (!port) |
743 | clear_thread_flag(TIF_SIGPENDING); | 770 | clear_thread_flag(TIF_SIGPENDING); |
@@ -840,9 +867,9 @@ svc_process(struct svc_rqst *rqstp) | |||
840 | rqstp->rq_res.tail[0].iov_len = 0; | 867 | rqstp->rq_res.tail[0].iov_len = 0; |
841 | /* Will be turned off only in gss privacy case: */ | 868 | /* Will be turned off only in gss privacy case: */ |
842 | rqstp->rq_splice_ok = 1; | 869 | rqstp->rq_splice_ok = 1; |
843 | /* tcp needs a space for the record length... */ | 870 | |
844 | if (rqstp->rq_prot == IPPROTO_TCP) | 871 | /* Setup reply header */ |
845 | svc_putnl(resv, 0); | 872 | rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); |
846 | 873 | ||
847 | rqstp->rq_xid = svc_getu32(argv); | 874 | rqstp->rq_xid = svc_getu32(argv); |
848 | svc_putu32(resv, rqstp->rq_xid); | 875 | svc_putu32(resv, rqstp->rq_xid); |
@@ -1049,16 +1076,15 @@ err_bad: | |||
1049 | svc_putnl(resv, ntohl(rpc_stat)); | 1076 | svc_putnl(resv, ntohl(rpc_stat)); |
1050 | goto sendit; | 1077 | goto sendit; |
1051 | } | 1078 | } |
1079 | EXPORT_SYMBOL(svc_process); | ||
1052 | 1080 | ||
1053 | /* | 1081 | /* |
1054 | * Return (transport-specific) limit on the rpc payload. | 1082 | * Return (transport-specific) limit on the rpc payload. |
1055 | */ | 1083 | */ |
1056 | u32 svc_max_payload(const struct svc_rqst *rqstp) | 1084 | u32 svc_max_payload(const struct svc_rqst *rqstp) |
1057 | { | 1085 | { |
1058 | int max = RPCSVC_MAXPAYLOAD_TCP; | 1086 | u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload; |
1059 | 1087 | ||
1060 | if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM) | ||
1061 | max = RPCSVC_MAXPAYLOAD_UDP; | ||
1062 | if (rqstp->rq_server->sv_max_payload < max) | 1088 | if (rqstp->rq_server->sv_max_payload < max) |
1063 | max = rqstp->rq_server->sv_max_payload; | 1089 | max = rqstp->rq_server->sv_max_payload; |
1064 | return max; | 1090 | return max; |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c new file mode 100644 index 000000000000..ea377e06afae --- /dev/null +++ b/net/sunrpc/svc_xprt.c | |||
@@ -0,0 +1,1055 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/svc_xprt.c | ||
3 | * | ||
4 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/sched.h> | ||
8 | #include <linux/errno.h> | ||
9 | #include <linux/fcntl.h> | ||
10 | #include <linux/net.h> | ||
11 | #include <linux/in.h> | ||
12 | #include <linux/inet.h> | ||
13 | #include <linux/udp.h> | ||
14 | #include <linux/tcp.h> | ||
15 | #include <linux/unistd.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <linux/netdevice.h> | ||
18 | #include <linux/skbuff.h> | ||
19 | #include <linux/file.h> | ||
20 | #include <linux/freezer.h> | ||
21 | #include <net/sock.h> | ||
22 | #include <net/checksum.h> | ||
23 | #include <net/ip.h> | ||
24 | #include <net/ipv6.h> | ||
25 | #include <net/tcp_states.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <asm/ioctls.h> | ||
28 | |||
29 | #include <linux/sunrpc/types.h> | ||
30 | #include <linux/sunrpc/clnt.h> | ||
31 | #include <linux/sunrpc/xdr.h> | ||
32 | #include <linux/sunrpc/stats.h> | ||
33 | #include <linux/sunrpc/svc_xprt.h> | ||
34 | |||
35 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
36 | |||
37 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); | ||
38 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
39 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
40 | static void svc_age_temp_xprts(unsigned long closure); | ||
41 | |||
42 | /* apparently the "standard" is that clients close | ||
43 | * idle connections after 5 minutes, servers after | ||
44 | * 6 minutes | ||
45 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
46 | */ | ||
47 | static int svc_conn_age_period = 6*60; | ||
48 | |||
49 | /* List of registered transport classes */ | ||
50 | static DEFINE_SPINLOCK(svc_xprt_class_lock); | ||
51 | static LIST_HEAD(svc_xprt_class_list); | ||
52 | |||
53 | /* SMP locking strategy: | ||
54 | * | ||
55 | * svc_pool->sp_lock protects most of the fields of that pool. | ||
56 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
57 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
58 | * BKL protects svc_serv->sv_nrthread. | ||
59 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | ||
60 | * and the ->sk_info_authunix cache. | ||
61 | * | ||
62 | * The XPT_BUSY bit in xprt->xpt_flags prevents a transport being | ||
63 | * enqueued multiply. During normal transport processing this bit | ||
64 | * is set by svc_xprt_enqueue and cleared by svc_xprt_received. | ||
65 | * Providers should not manipulate this bit directly. | ||
66 | * | ||
67 | * Some flags can be set to certain values at any time | ||
68 | * providing that certain rules are followed: | ||
69 | * | ||
70 | * XPT_CONN, XPT_DATA: | ||
71 | * - Can be set or cleared at any time. | ||
72 | * - After a set, svc_xprt_enqueue must be called to enqueue | ||
73 | * the transport for processing. | ||
74 | * - After a clear, the transport must be read/accepted. | ||
75 | * If this succeeds, it must be set again. | ||
76 | * XPT_CLOSE: | ||
77 | * - Can set at any time. It is never cleared. | ||
78 | * XPT_DEAD: | ||
79 | * - Can only be set while XPT_BUSY is held which ensures | ||
80 | * that no other thread will be using the transport or will | ||
81 | * try to set XPT_DEAD. | ||
82 | */ | ||
83 | |||
84 | int svc_reg_xprt_class(struct svc_xprt_class *xcl) | ||
85 | { | ||
86 | struct svc_xprt_class *cl; | ||
87 | int res = -EEXIST; | ||
88 | |||
89 | dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name); | ||
90 | |||
91 | INIT_LIST_HEAD(&xcl->xcl_list); | ||
92 | spin_lock(&svc_xprt_class_lock); | ||
93 | /* Make sure there isn't already a class with the same name */ | ||
94 | list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) { | ||
95 | if (strcmp(xcl->xcl_name, cl->xcl_name) == 0) | ||
96 | goto out; | ||
97 | } | ||
98 | list_add_tail(&xcl->xcl_list, &svc_xprt_class_list); | ||
99 | res = 0; | ||
100 | out: | ||
101 | spin_unlock(&svc_xprt_class_lock); | ||
102 | return res; | ||
103 | } | ||
104 | EXPORT_SYMBOL_GPL(svc_reg_xprt_class); | ||
105 | |||
106 | void svc_unreg_xprt_class(struct svc_xprt_class *xcl) | ||
107 | { | ||
108 | dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name); | ||
109 | spin_lock(&svc_xprt_class_lock); | ||
110 | list_del_init(&xcl->xcl_list); | ||
111 | spin_unlock(&svc_xprt_class_lock); | ||
112 | } | ||
113 | EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); | ||
114 | |||
115 | /* | ||
116 | * Format the transport list for printing | ||
117 | */ | ||
118 | int svc_print_xprts(char *buf, int maxlen) | ||
119 | { | ||
120 | struct list_head *le; | ||
121 | char tmpstr[80]; | ||
122 | int len = 0; | ||
123 | buf[0] = '\0'; | ||
124 | |||
125 | spin_lock(&svc_xprt_class_lock); | ||
126 | list_for_each(le, &svc_xprt_class_list) { | ||
127 | int slen; | ||
128 | struct svc_xprt_class *xcl = | ||
129 | list_entry(le, struct svc_xprt_class, xcl_list); | ||
130 | |||
131 | sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); | ||
132 | slen = strlen(tmpstr); | ||
133 | if (len + slen > maxlen) | ||
134 | break; | ||
135 | len += slen; | ||
136 | strcat(buf, tmpstr); | ||
137 | } | ||
138 | spin_unlock(&svc_xprt_class_lock); | ||
139 | |||
140 | return len; | ||
141 | } | ||
142 | |||
143 | static void svc_xprt_free(struct kref *kref) | ||
144 | { | ||
145 | struct svc_xprt *xprt = | ||
146 | container_of(kref, struct svc_xprt, xpt_ref); | ||
147 | struct module *owner = xprt->xpt_class->xcl_owner; | ||
148 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) | ||
149 | && xprt->xpt_auth_cache != NULL) | ||
150 | svcauth_unix_info_release(xprt->xpt_auth_cache); | ||
151 | xprt->xpt_ops->xpo_free(xprt); | ||
152 | module_put(owner); | ||
153 | } | ||
154 | |||
155 | void svc_xprt_put(struct svc_xprt *xprt) | ||
156 | { | ||
157 | kref_put(&xprt->xpt_ref, svc_xprt_free); | ||
158 | } | ||
159 | EXPORT_SYMBOL_GPL(svc_xprt_put); | ||
160 | |||
161 | /* | ||
162 | * Called by transport drivers to initialize the transport independent | ||
163 | * portion of the transport instance. | ||
164 | */ | ||
165 | void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, | ||
166 | struct svc_serv *serv) | ||
167 | { | ||
168 | memset(xprt, 0, sizeof(*xprt)); | ||
169 | xprt->xpt_class = xcl; | ||
170 | xprt->xpt_ops = xcl->xcl_ops; | ||
171 | kref_init(&xprt->xpt_ref); | ||
172 | xprt->xpt_server = serv; | ||
173 | INIT_LIST_HEAD(&xprt->xpt_list); | ||
174 | INIT_LIST_HEAD(&xprt->xpt_ready); | ||
175 | INIT_LIST_HEAD(&xprt->xpt_deferred); | ||
176 | mutex_init(&xprt->xpt_mutex); | ||
177 | spin_lock_init(&xprt->xpt_lock); | ||
178 | set_bit(XPT_BUSY, &xprt->xpt_flags); | ||
179 | } | ||
180 | EXPORT_SYMBOL_GPL(svc_xprt_init); | ||
181 | |||
182 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | ||
183 | int flags) | ||
184 | { | ||
185 | struct svc_xprt_class *xcl; | ||
186 | struct sockaddr_in sin = { | ||
187 | .sin_family = AF_INET, | ||
188 | .sin_addr.s_addr = INADDR_ANY, | ||
189 | .sin_port = htons(port), | ||
190 | }; | ||
191 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); | ||
192 | spin_lock(&svc_xprt_class_lock); | ||
193 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { | ||
194 | struct svc_xprt *newxprt; | ||
195 | |||
196 | if (strcmp(xprt_name, xcl->xcl_name)) | ||
197 | continue; | ||
198 | |||
199 | if (!try_module_get(xcl->xcl_owner)) | ||
200 | goto err; | ||
201 | |||
202 | spin_unlock(&svc_xprt_class_lock); | ||
203 | newxprt = xcl->xcl_ops-> | ||
204 | xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), | ||
205 | flags); | ||
206 | if (IS_ERR(newxprt)) { | ||
207 | module_put(xcl->xcl_owner); | ||
208 | return PTR_ERR(newxprt); | ||
209 | } | ||
210 | |||
211 | clear_bit(XPT_TEMP, &newxprt->xpt_flags); | ||
212 | spin_lock_bh(&serv->sv_lock); | ||
213 | list_add(&newxprt->xpt_list, &serv->sv_permsocks); | ||
214 | spin_unlock_bh(&serv->sv_lock); | ||
215 | clear_bit(XPT_BUSY, &newxprt->xpt_flags); | ||
216 | return svc_xprt_local_port(newxprt); | ||
217 | } | ||
218 | err: | ||
219 | spin_unlock(&svc_xprt_class_lock); | ||
220 | dprintk("svc: transport %s not found\n", xprt_name); | ||
221 | return -ENOENT; | ||
222 | } | ||
223 | EXPORT_SYMBOL_GPL(svc_create_xprt); | ||
224 | |||
225 | /* | ||
226 | * Copy the local and remote xprt addresses to the rqstp structure | ||
227 | */ | ||
228 | void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) | ||
229 | { | ||
230 | struct sockaddr *sin; | ||
231 | |||
232 | memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); | ||
233 | rqstp->rq_addrlen = xprt->xpt_remotelen; | ||
234 | |||
235 | /* | ||
236 | * Destination address in request is needed for binding the | ||
237 | * source address in RPC replies/callbacks later. | ||
238 | */ | ||
239 | sin = (struct sockaddr *)&xprt->xpt_local; | ||
240 | switch (sin->sa_family) { | ||
241 | case AF_INET: | ||
242 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | ||
243 | break; | ||
244 | case AF_INET6: | ||
245 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | ||
246 | break; | ||
247 | } | ||
248 | } | ||
249 | EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); | ||
250 | |||
251 | /** | ||
252 | * svc_print_addr - Format rq_addr field for printing | ||
253 | * @rqstp: svc_rqst struct containing address to print | ||
254 | * @buf: target buffer for formatted address | ||
255 | * @len: length of target buffer | ||
256 | * | ||
257 | */ | ||
258 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | ||
259 | { | ||
260 | return __svc_print_addr(svc_addr(rqstp), buf, len); | ||
261 | } | ||
262 | EXPORT_SYMBOL_GPL(svc_print_addr); | ||
263 | |||
264 | /* | ||
265 | * Queue up an idle server thread. Must have pool->sp_lock held. | ||
266 | * Note: this is really a stack rather than a queue, so that we only | ||
267 | * use as many different threads as we need, and the rest don't pollute | ||
268 | * the cache. | ||
269 | */ | ||
270 | static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
271 | { | ||
272 | list_add(&rqstp->rq_list, &pool->sp_threads); | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | ||
277 | */ | ||
278 | static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
279 | { | ||
280 | list_del(&rqstp->rq_list); | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * Queue up a transport with data pending. If there are idle nfsd | ||
285 | * processes, wake 'em up. | ||
286 | * | ||
287 | */ | ||
288 | void svc_xprt_enqueue(struct svc_xprt *xprt) | ||
289 | { | ||
290 | struct svc_serv *serv = xprt->xpt_server; | ||
291 | struct svc_pool *pool; | ||
292 | struct svc_rqst *rqstp; | ||
293 | int cpu; | ||
294 | |||
295 | if (!(xprt->xpt_flags & | ||
296 | ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) | ||
297 | return; | ||
298 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | ||
299 | return; | ||
300 | |||
301 | cpu = get_cpu(); | ||
302 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | ||
303 | put_cpu(); | ||
304 | |||
305 | spin_lock_bh(&pool->sp_lock); | ||
306 | |||
307 | if (!list_empty(&pool->sp_threads) && | ||
308 | !list_empty(&pool->sp_sockets)) | ||
309 | printk(KERN_ERR | ||
310 | "svc_xprt_enqueue: " | ||
311 | "threads and transports both waiting??\n"); | ||
312 | |||
313 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { | ||
314 | /* Don't enqueue dead transports */ | ||
315 | dprintk("svc: transport %p is dead, not enqueued\n", xprt); | ||
316 | goto out_unlock; | ||
317 | } | ||
318 | |||
319 | /* Mark transport as busy. It will remain in this state until | ||
320 | * the provider calls svc_xprt_received. We update XPT_BUSY | ||
321 | * atomically because it also guards against trying to enqueue | ||
322 | * the transport twice. | ||
323 | */ | ||
324 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { | ||
325 | /* Don't enqueue transport while already enqueued */ | ||
326 | dprintk("svc: transport %p busy, not enqueued\n", xprt); | ||
327 | goto out_unlock; | ||
328 | } | ||
329 | BUG_ON(xprt->xpt_pool != NULL); | ||
330 | xprt->xpt_pool = pool; | ||
331 | |||
332 | /* Handle pending connection */ | ||
333 | if (test_bit(XPT_CONN, &xprt->xpt_flags)) | ||
334 | goto process; | ||
335 | |||
336 | /* Handle close in-progress */ | ||
337 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | ||
338 | goto process; | ||
339 | |||
340 | /* Check if we have space to reply to a request */ | ||
341 | if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { | ||
342 | /* Don't enqueue while not enough space for reply */ | ||
343 | dprintk("svc: no write space, transport %p not enqueued\n", | ||
344 | xprt); | ||
345 | xprt->xpt_pool = NULL; | ||
346 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
347 | goto out_unlock; | ||
348 | } | ||
349 | |||
350 | process: | ||
351 | if (!list_empty(&pool->sp_threads)) { | ||
352 | rqstp = list_entry(pool->sp_threads.next, | ||
353 | struct svc_rqst, | ||
354 | rq_list); | ||
355 | dprintk("svc: transport %p served by daemon %p\n", | ||
356 | xprt, rqstp); | ||
357 | svc_thread_dequeue(pool, rqstp); | ||
358 | if (rqstp->rq_xprt) | ||
359 | printk(KERN_ERR | ||
360 | "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", | ||
361 | rqstp, rqstp->rq_xprt); | ||
362 | rqstp->rq_xprt = xprt; | ||
363 | svc_xprt_get(xprt); | ||
364 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
365 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
366 | BUG_ON(xprt->xpt_pool != pool); | ||
367 | wake_up(&rqstp->rq_wait); | ||
368 | } else { | ||
369 | dprintk("svc: transport %p put into queue\n", xprt); | ||
370 | list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); | ||
371 | BUG_ON(xprt->xpt_pool != pool); | ||
372 | } | ||
373 | |||
374 | out_unlock: | ||
375 | spin_unlock_bh(&pool->sp_lock); | ||
376 | } | ||
377 | EXPORT_SYMBOL_GPL(svc_xprt_enqueue); | ||
378 | |||
379 | /* | ||
380 | * Dequeue the first transport. Must be called with the pool->sp_lock held. | ||
381 | */ | ||
382 | static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) | ||
383 | { | ||
384 | struct svc_xprt *xprt; | ||
385 | |||
386 | if (list_empty(&pool->sp_sockets)) | ||
387 | return NULL; | ||
388 | |||
389 | xprt = list_entry(pool->sp_sockets.next, | ||
390 | struct svc_xprt, xpt_ready); | ||
391 | list_del_init(&xprt->xpt_ready); | ||
392 | |||
393 | dprintk("svc: transport %p dequeued, inuse=%d\n", | ||
394 | xprt, atomic_read(&xprt->xpt_ref.refcount)); | ||
395 | |||
396 | return xprt; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * svc_xprt_received conditionally queues the transport for processing | ||
401 | * by another thread. The caller must hold the XPT_BUSY bit and must | ||
402 | * not thereafter touch transport data. | ||
403 | * | ||
404 | * Note: XPT_DATA only gets cleared when a read-attempt finds no (or | ||
405 | * insufficient) data. | ||
406 | */ | ||
407 | void svc_xprt_received(struct svc_xprt *xprt) | ||
408 | { | ||
409 | BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); | ||
410 | xprt->xpt_pool = NULL; | ||
411 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
412 | svc_xprt_enqueue(xprt); | ||
413 | } | ||
414 | EXPORT_SYMBOL_GPL(svc_xprt_received); | ||
415 | |||
416 | /** | ||
417 | * svc_reserve - change the space reserved for the reply to a request. | ||
418 | * @rqstp: The request in question | ||
419 | * @space: new max space to reserve | ||
420 | * | ||
421 | * Each request reserves some space on the output queue of the transport | ||
422 | * to make sure the reply fits. This function reduces that reserved | ||
423 | * space to be the amount of space used already, plus @space. | ||
424 | * | ||
425 | */ | ||
426 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
427 | { | ||
428 | space += rqstp->rq_res.head[0].iov_len; | ||
429 | |||
430 | if (space < rqstp->rq_reserved) { | ||
431 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
432 | atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); | ||
433 | rqstp->rq_reserved = space; | ||
434 | |||
435 | svc_xprt_enqueue(xprt); | ||
436 | } | ||
437 | } | ||
438 | EXPORT_SYMBOL(svc_reserve); | ||
439 | |||
440 | static void svc_xprt_release(struct svc_rqst *rqstp) | ||
441 | { | ||
442 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
443 | |||
444 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | ||
445 | |||
446 | svc_free_res_pages(rqstp); | ||
447 | rqstp->rq_res.page_len = 0; | ||
448 | rqstp->rq_res.page_base = 0; | ||
449 | |||
450 | /* Reset response buffer and release | ||
451 | * the reservation. | ||
452 | * But first, check that enough space was reserved | ||
453 | * for the reply, otherwise we have a bug! | ||
454 | */ | ||
455 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
456 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
457 | rqstp->rq_reserved, | ||
458 | rqstp->rq_res.len); | ||
459 | |||
460 | rqstp->rq_res.head[0].iov_len = 0; | ||
461 | svc_reserve(rqstp, 0); | ||
462 | rqstp->rq_xprt = NULL; | ||
463 | |||
464 | svc_xprt_put(xprt); | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * External function to wake up a server waiting for data | ||
469 | * This really only makes sense for services like lockd | ||
470 | * which have exactly one thread anyway. | ||
471 | */ | ||
472 | void svc_wake_up(struct svc_serv *serv) | ||
473 | { | ||
474 | struct svc_rqst *rqstp; | ||
475 | unsigned int i; | ||
476 | struct svc_pool *pool; | ||
477 | |||
478 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
479 | pool = &serv->sv_pools[i]; | ||
480 | |||
481 | spin_lock_bh(&pool->sp_lock); | ||
482 | if (!list_empty(&pool->sp_threads)) { | ||
483 | rqstp = list_entry(pool->sp_threads.next, | ||
484 | struct svc_rqst, | ||
485 | rq_list); | ||
486 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
487 | /* | ||
488 | svc_thread_dequeue(pool, rqstp); | ||
489 | rqstp->rq_xprt = NULL; | ||
490 | */ | ||
491 | wake_up(&rqstp->rq_wait); | ||
492 | } | ||
493 | spin_unlock_bh(&pool->sp_lock); | ||
494 | } | ||
495 | } | ||
496 | EXPORT_SYMBOL(svc_wake_up); | ||
497 | |||
498 | int svc_port_is_privileged(struct sockaddr *sin) | ||
499 | { | ||
500 | switch (sin->sa_family) { | ||
501 | case AF_INET: | ||
502 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | ||
503 | < PROT_SOCK; | ||
504 | case AF_INET6: | ||
505 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | ||
506 | < PROT_SOCK; | ||
507 | default: | ||
508 | return 0; | ||
509 | } | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Make sure that we don't have too many active connections. If we | ||
514 | * have, something must be dropped. | ||
515 | * | ||
516 | * There's no point in trying to do random drop here for DoS | ||
517 | * prevention. The NFS clients does 1 reconnect in 15 seconds. An | ||
518 | * attacker can easily beat that. | ||
519 | * | ||
520 | * The only somewhat efficient mechanism would be if drop old | ||
521 | * connections from the same IP first. But right now we don't even | ||
522 | * record the client IP in svc_sock. | ||
523 | */ | ||
524 | static void svc_check_conn_limits(struct svc_serv *serv) | ||
525 | { | ||
526 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
527 | struct svc_xprt *xprt = NULL; | ||
528 | spin_lock_bh(&serv->sv_lock); | ||
529 | if (!list_empty(&serv->sv_tempsocks)) { | ||
530 | if (net_ratelimit()) { | ||
531 | /* Try to help the admin */ | ||
532 | printk(KERN_NOTICE "%s: too many open " | ||
533 | "connections, consider increasing the " | ||
534 | "number of nfsd threads\n", | ||
535 | serv->sv_name); | ||
536 | } | ||
537 | /* | ||
538 | * Always select the oldest connection. It's not fair, | ||
539 | * but so is life | ||
540 | */ | ||
541 | xprt = list_entry(serv->sv_tempsocks.prev, | ||
542 | struct svc_xprt, | ||
543 | xpt_list); | ||
544 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
545 | svc_xprt_get(xprt); | ||
546 | } | ||
547 | spin_unlock_bh(&serv->sv_lock); | ||
548 | |||
549 | if (xprt) { | ||
550 | svc_xprt_enqueue(xprt); | ||
551 | svc_xprt_put(xprt); | ||
552 | } | ||
553 | } | ||
554 | } | ||
555 | |||
556 | /* | ||
557 | * Receive the next request on any transport. This code is carefully | ||
558 | * organised not to touch any cachelines in the shared svc_serv | ||
559 | * structure, only cachelines in the local svc_pool. | ||
560 | */ | ||
561 | int svc_recv(struct svc_rqst *rqstp, long timeout) | ||
562 | { | ||
563 | struct svc_xprt *xprt = NULL; | ||
564 | struct svc_serv *serv = rqstp->rq_server; | ||
565 | struct svc_pool *pool = rqstp->rq_pool; | ||
566 | int len, i; | ||
567 | int pages; | ||
568 | struct xdr_buf *arg; | ||
569 | DECLARE_WAITQUEUE(wait, current); | ||
570 | |||
571 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
572 | rqstp, timeout); | ||
573 | |||
574 | if (rqstp->rq_xprt) | ||
575 | printk(KERN_ERR | ||
576 | "svc_recv: service %p, transport not NULL!\n", | ||
577 | rqstp); | ||
578 | if (waitqueue_active(&rqstp->rq_wait)) | ||
579 | printk(KERN_ERR | ||
580 | "svc_recv: service %p, wait queue active!\n", | ||
581 | rqstp); | ||
582 | |||
583 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
584 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | ||
585 | for (i = 0; i < pages ; i++) | ||
586 | while (rqstp->rq_pages[i] == NULL) { | ||
587 | struct page *p = alloc_page(GFP_KERNEL); | ||
588 | if (!p) { | ||
589 | int j = msecs_to_jiffies(500); | ||
590 | schedule_timeout_uninterruptible(j); | ||
591 | } | ||
592 | rqstp->rq_pages[i] = p; | ||
593 | } | ||
594 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | ||
595 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
596 | |||
597 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
598 | arg = &rqstp->rq_arg; | ||
599 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | ||
600 | arg->head[0].iov_len = PAGE_SIZE; | ||
601 | arg->pages = rqstp->rq_pages + 1; | ||
602 | arg->page_base = 0; | ||
603 | /* save at least one page for response */ | ||
604 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
605 | arg->len = (pages-1)*PAGE_SIZE; | ||
606 | arg->tail[0].iov_len = 0; | ||
607 | |||
608 | try_to_freeze(); | ||
609 | cond_resched(); | ||
610 | if (signalled()) | ||
611 | return -EINTR; | ||
612 | |||
613 | spin_lock_bh(&pool->sp_lock); | ||
614 | xprt = svc_xprt_dequeue(pool); | ||
615 | if (xprt) { | ||
616 | rqstp->rq_xprt = xprt; | ||
617 | svc_xprt_get(xprt); | ||
618 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
619 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
620 | } else { | ||
621 | /* No data pending. Go to sleep */ | ||
622 | svc_thread_enqueue(pool, rqstp); | ||
623 | |||
624 | /* | ||
625 | * We have to be able to interrupt this wait | ||
626 | * to bring down the daemons ... | ||
627 | */ | ||
628 | set_current_state(TASK_INTERRUPTIBLE); | ||
629 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
630 | spin_unlock_bh(&pool->sp_lock); | ||
631 | |||
632 | schedule_timeout(timeout); | ||
633 | |||
634 | try_to_freeze(); | ||
635 | |||
636 | spin_lock_bh(&pool->sp_lock); | ||
637 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
638 | |||
639 | xprt = rqstp->rq_xprt; | ||
640 | if (!xprt) { | ||
641 | svc_thread_dequeue(pool, rqstp); | ||
642 | spin_unlock_bh(&pool->sp_lock); | ||
643 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
644 | return signalled()? -EINTR : -EAGAIN; | ||
645 | } | ||
646 | } | ||
647 | spin_unlock_bh(&pool->sp_lock); | ||
648 | |||
649 | len = 0; | ||
650 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { | ||
651 | dprintk("svc_recv: found XPT_CLOSE\n"); | ||
652 | svc_delete_xprt(xprt); | ||
653 | } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { | ||
654 | struct svc_xprt *newxpt; | ||
655 | newxpt = xprt->xpt_ops->xpo_accept(xprt); | ||
656 | if (newxpt) { | ||
657 | /* | ||
658 | * We know this module_get will succeed because the | ||
659 | * listener holds a reference too | ||
660 | */ | ||
661 | __module_get(newxpt->xpt_class->xcl_owner); | ||
662 | svc_check_conn_limits(xprt->xpt_server); | ||
663 | spin_lock_bh(&serv->sv_lock); | ||
664 | set_bit(XPT_TEMP, &newxpt->xpt_flags); | ||
665 | list_add(&newxpt->xpt_list, &serv->sv_tempsocks); | ||
666 | serv->sv_tmpcnt++; | ||
667 | if (serv->sv_temptimer.function == NULL) { | ||
668 | /* setup timer to age temp transports */ | ||
669 | setup_timer(&serv->sv_temptimer, | ||
670 | svc_age_temp_xprts, | ||
671 | (unsigned long)serv); | ||
672 | mod_timer(&serv->sv_temptimer, | ||
673 | jiffies + svc_conn_age_period * HZ); | ||
674 | } | ||
675 | spin_unlock_bh(&serv->sv_lock); | ||
676 | svc_xprt_received(newxpt); | ||
677 | } | ||
678 | svc_xprt_received(xprt); | ||
679 | } else { | ||
680 | dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", | ||
681 | rqstp, pool->sp_id, xprt, | ||
682 | atomic_read(&xprt->xpt_ref.refcount)); | ||
683 | rqstp->rq_deferred = svc_deferred_dequeue(xprt); | ||
684 | if (rqstp->rq_deferred) { | ||
685 | svc_xprt_received(xprt); | ||
686 | len = svc_deferred_recv(rqstp); | ||
687 | } else | ||
688 | len = xprt->xpt_ops->xpo_recvfrom(rqstp); | ||
689 | dprintk("svc: got len=%d\n", len); | ||
690 | } | ||
691 | |||
692 | /* No data, incomplete (TCP) read, or accept() */ | ||
693 | if (len == 0 || len == -EAGAIN) { | ||
694 | rqstp->rq_res.len = 0; | ||
695 | svc_xprt_release(rqstp); | ||
696 | return -EAGAIN; | ||
697 | } | ||
698 | clear_bit(XPT_OLD, &xprt->xpt_flags); | ||
699 | |||
700 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | ||
701 | rqstp->rq_chandle.defer = svc_defer; | ||
702 | |||
703 | if (serv->sv_stats) | ||
704 | serv->sv_stats->netcnt++; | ||
705 | return len; | ||
706 | } | ||
707 | EXPORT_SYMBOL(svc_recv); | ||
708 | |||
709 | /* | ||
710 | * Drop request | ||
711 | */ | ||
712 | void svc_drop(struct svc_rqst *rqstp) | ||
713 | { | ||
714 | dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); | ||
715 | svc_xprt_release(rqstp); | ||
716 | } | ||
717 | EXPORT_SYMBOL(svc_drop); | ||
718 | |||
719 | /* | ||
720 | * Return reply to client. | ||
721 | */ | ||
722 | int svc_send(struct svc_rqst *rqstp) | ||
723 | { | ||
724 | struct svc_xprt *xprt; | ||
725 | int len; | ||
726 | struct xdr_buf *xb; | ||
727 | |||
728 | xprt = rqstp->rq_xprt; | ||
729 | if (!xprt) | ||
730 | return -EFAULT; | ||
731 | |||
732 | /* release the receive skb before sending the reply */ | ||
733 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | ||
734 | |||
735 | /* calculate over-all length */ | ||
736 | xb = &rqstp->rq_res; | ||
737 | xb->len = xb->head[0].iov_len + | ||
738 | xb->page_len + | ||
739 | xb->tail[0].iov_len; | ||
740 | |||
741 | /* Grab mutex to serialize outgoing data. */ | ||
742 | mutex_lock(&xprt->xpt_mutex); | ||
743 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | ||
744 | len = -ENOTCONN; | ||
745 | else | ||
746 | len = xprt->xpt_ops->xpo_sendto(rqstp); | ||
747 | mutex_unlock(&xprt->xpt_mutex); | ||
748 | svc_xprt_release(rqstp); | ||
749 | |||
750 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
751 | return 0; | ||
752 | return len; | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Timer function to close old temporary transports, using | ||
757 | * a mark-and-sweep algorithm. | ||
758 | */ | ||
759 | static void svc_age_temp_xprts(unsigned long closure) | ||
760 | { | ||
761 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
762 | struct svc_xprt *xprt; | ||
763 | struct list_head *le, *next; | ||
764 | LIST_HEAD(to_be_aged); | ||
765 | |||
766 | dprintk("svc_age_temp_xprts\n"); | ||
767 | |||
768 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
769 | /* busy, try again 1 sec later */ | ||
770 | dprintk("svc_age_temp_xprts: busy\n"); | ||
771 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
772 | return; | ||
773 | } | ||
774 | |||
775 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
776 | xprt = list_entry(le, struct svc_xprt, xpt_list); | ||
777 | |||
778 | /* First time through, just mark it OLD. Second time | ||
779 | * through, close it. */ | ||
780 | if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) | ||
781 | continue; | ||
782 | if (atomic_read(&xprt->xpt_ref.refcount) > 1 | ||
783 | || test_bit(XPT_BUSY, &xprt->xpt_flags)) | ||
784 | continue; | ||
785 | svc_xprt_get(xprt); | ||
786 | list_move(le, &to_be_aged); | ||
787 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
788 | set_bit(XPT_DETACHED, &xprt->xpt_flags); | ||
789 | } | ||
790 | spin_unlock_bh(&serv->sv_lock); | ||
791 | |||
792 | while (!list_empty(&to_be_aged)) { | ||
793 | le = to_be_aged.next; | ||
794 | /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ | ||
795 | list_del_init(le); | ||
796 | xprt = list_entry(le, struct svc_xprt, xpt_list); | ||
797 | |||
798 | dprintk("queuing xprt %p for closing\n", xprt); | ||
799 | |||
800 | /* a thread will dequeue and close it soon */ | ||
801 | svc_xprt_enqueue(xprt); | ||
802 | svc_xprt_put(xprt); | ||
803 | } | ||
804 | |||
805 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
806 | } | ||
807 | |||
808 | /* | ||
809 | * Remove a dead transport | ||
810 | */ | ||
811 | void svc_delete_xprt(struct svc_xprt *xprt) | ||
812 | { | ||
813 | struct svc_serv *serv = xprt->xpt_server; | ||
814 | |||
815 | dprintk("svc: svc_delete_xprt(%p)\n", xprt); | ||
816 | xprt->xpt_ops->xpo_detach(xprt); | ||
817 | |||
818 | spin_lock_bh(&serv->sv_lock); | ||
819 | if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) | ||
820 | list_del_init(&xprt->xpt_list); | ||
821 | /* | ||
822 | * We used to delete the transport from whichever list | ||
823 | * it's sk_xprt.xpt_ready node was on, but we don't actually | ||
824 | * need to. This is because the only time we're called | ||
825 | * while still attached to a queue, the queue itself | ||
826 | * is about to be destroyed (in svc_destroy). | ||
827 | */ | ||
828 | if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) { | ||
829 | BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2); | ||
830 | if (test_bit(XPT_TEMP, &xprt->xpt_flags)) | ||
831 | serv->sv_tmpcnt--; | ||
832 | svc_xprt_put(xprt); | ||
833 | } | ||
834 | spin_unlock_bh(&serv->sv_lock); | ||
835 | } | ||
836 | |||
837 | void svc_close_xprt(struct svc_xprt *xprt) | ||
838 | { | ||
839 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
840 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) | ||
841 | /* someone else will have to effect the close */ | ||
842 | return; | ||
843 | |||
844 | svc_xprt_get(xprt); | ||
845 | svc_delete_xprt(xprt); | ||
846 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
847 | svc_xprt_put(xprt); | ||
848 | } | ||
849 | EXPORT_SYMBOL_GPL(svc_close_xprt); | ||
850 | |||
851 | void svc_close_all(struct list_head *xprt_list) | ||
852 | { | ||
853 | struct svc_xprt *xprt; | ||
854 | struct svc_xprt *tmp; | ||
855 | |||
856 | list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { | ||
857 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
858 | if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { | ||
859 | /* Waiting to be processed, but no threads left, | ||
860 | * So just remove it from the waiting list | ||
861 | */ | ||
862 | list_del_init(&xprt->xpt_ready); | ||
863 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
864 | } | ||
865 | svc_close_xprt(xprt); | ||
866 | } | ||
867 | } | ||
868 | |||
869 | /* | ||
870 | * Handle defer and revisit of requests | ||
871 | */ | ||
872 | |||
873 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
874 | { | ||
875 | struct svc_deferred_req *dr = | ||
876 | container_of(dreq, struct svc_deferred_req, handle); | ||
877 | struct svc_xprt *xprt = dr->xprt; | ||
878 | |||
879 | if (too_many) { | ||
880 | svc_xprt_put(xprt); | ||
881 | kfree(dr); | ||
882 | return; | ||
883 | } | ||
884 | dprintk("revisit queued\n"); | ||
885 | dr->xprt = NULL; | ||
886 | spin_lock(&xprt->xpt_lock); | ||
887 | list_add(&dr->handle.recent, &xprt->xpt_deferred); | ||
888 | spin_unlock(&xprt->xpt_lock); | ||
889 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
890 | svc_xprt_enqueue(xprt); | ||
891 | svc_xprt_put(xprt); | ||
892 | } | ||
893 | |||
894 | /* | ||
895 | * Save the request off for later processing. The request buffer looks | ||
896 | * like this: | ||
897 | * | ||
898 | * <xprt-header><rpc-header><rpc-pagelist><rpc-tail> | ||
899 | * | ||
900 | * This code can only handle requests that consist of an xprt-header | ||
901 | * and rpc-header. | ||
902 | */ | ||
903 | static struct cache_deferred_req *svc_defer(struct cache_req *req) | ||
904 | { | ||
905 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
906 | struct svc_deferred_req *dr; | ||
907 | |||
908 | if (rqstp->rq_arg.page_len) | ||
909 | return NULL; /* if more than a page, give up FIXME */ | ||
910 | if (rqstp->rq_deferred) { | ||
911 | dr = rqstp->rq_deferred; | ||
912 | rqstp->rq_deferred = NULL; | ||
913 | } else { | ||
914 | size_t skip; | ||
915 | size_t size; | ||
916 | /* FIXME maybe discard if size too large */ | ||
917 | size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len; | ||
918 | dr = kmalloc(size, GFP_KERNEL); | ||
919 | if (dr == NULL) | ||
920 | return NULL; | ||
921 | |||
922 | dr->handle.owner = rqstp->rq_server; | ||
923 | dr->prot = rqstp->rq_prot; | ||
924 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | ||
925 | dr->addrlen = rqstp->rq_addrlen; | ||
926 | dr->daddr = rqstp->rq_daddr; | ||
927 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
928 | dr->xprt_hlen = rqstp->rq_xprt_hlen; | ||
929 | |||
930 | /* back up head to the start of the buffer and copy */ | ||
931 | skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
932 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, | ||
933 | dr->argslen << 2); | ||
934 | } | ||
935 | svc_xprt_get(rqstp->rq_xprt); | ||
936 | dr->xprt = rqstp->rq_xprt; | ||
937 | |||
938 | dr->handle.revisit = svc_revisit; | ||
939 | return &dr->handle; | ||
940 | } | ||
941 | |||
942 | /* | ||
943 | * recv data from a deferred request into an active one | ||
944 | */ | ||
945 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
946 | { | ||
947 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
948 | |||
949 | /* setup iov_base past transport header */ | ||
950 | rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); | ||
951 | /* The iov_len does not include the transport header bytes */ | ||
952 | rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen; | ||
953 | rqstp->rq_arg.page_len = 0; | ||
954 | /* The rq_arg.len includes the transport header bytes */ | ||
955 | rqstp->rq_arg.len = dr->argslen<<2; | ||
956 | rqstp->rq_prot = dr->prot; | ||
957 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | ||
958 | rqstp->rq_addrlen = dr->addrlen; | ||
959 | /* Save off transport header len in case we get deferred again */ | ||
960 | rqstp->rq_xprt_hlen = dr->xprt_hlen; | ||
961 | rqstp->rq_daddr = dr->daddr; | ||
962 | rqstp->rq_respages = rqstp->rq_pages; | ||
963 | return (dr->argslen<<2) - dr->xprt_hlen; | ||
964 | } | ||
965 | |||
966 | |||
967 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) | ||
968 | { | ||
969 | struct svc_deferred_req *dr = NULL; | ||
970 | |||
971 | if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) | ||
972 | return NULL; | ||
973 | spin_lock(&xprt->xpt_lock); | ||
974 | clear_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
975 | if (!list_empty(&xprt->xpt_deferred)) { | ||
976 | dr = list_entry(xprt->xpt_deferred.next, | ||
977 | struct svc_deferred_req, | ||
978 | handle.recent); | ||
979 | list_del_init(&dr->handle.recent); | ||
980 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
981 | } | ||
982 | spin_unlock(&xprt->xpt_lock); | ||
983 | return dr; | ||
984 | } | ||
985 | |||
986 | /* | ||
987 | * Return the transport instance pointer for the endpoint accepting | ||
988 | * connections/peer traffic from the specified transport class, | ||
989 | * address family and port. | ||
990 | * | ||
991 | * Specifying 0 for the address family or port is effectively a | ||
992 | * wild-card, and will result in matching the first transport in the | ||
993 | * service's list that has a matching class name. | ||
994 | */ | ||
995 | struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, | ||
996 | int af, int port) | ||
997 | { | ||
998 | struct svc_xprt *xprt; | ||
999 | struct svc_xprt *found = NULL; | ||
1000 | |||
1001 | /* Sanity check the args */ | ||
1002 | if (!serv || !xcl_name) | ||
1003 | return found; | ||
1004 | |||
1005 | spin_lock_bh(&serv->sv_lock); | ||
1006 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { | ||
1007 | if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) | ||
1008 | continue; | ||
1009 | if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) | ||
1010 | continue; | ||
1011 | if (port && port != svc_xprt_local_port(xprt)) | ||
1012 | continue; | ||
1013 | found = xprt; | ||
1014 | svc_xprt_get(xprt); | ||
1015 | break; | ||
1016 | } | ||
1017 | spin_unlock_bh(&serv->sv_lock); | ||
1018 | return found; | ||
1019 | } | ||
1020 | EXPORT_SYMBOL_GPL(svc_find_xprt); | ||
1021 | |||
1022 | /* | ||
1023 | * Format a buffer with a list of the active transports. A zero for | ||
1024 | * the buflen parameter disables target buffer overflow checking. | ||
1025 | */ | ||
1026 | int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) | ||
1027 | { | ||
1028 | struct svc_xprt *xprt; | ||
1029 | char xprt_str[64]; | ||
1030 | int totlen = 0; | ||
1031 | int len; | ||
1032 | |||
1033 | /* Sanity check args */ | ||
1034 | if (!serv) | ||
1035 | return 0; | ||
1036 | |||
1037 | spin_lock_bh(&serv->sv_lock); | ||
1038 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { | ||
1039 | len = snprintf(xprt_str, sizeof(xprt_str), | ||
1040 | "%s %d\n", xprt->xpt_class->xcl_name, | ||
1041 | svc_xprt_local_port(xprt)); | ||
1042 | /* If the string was truncated, replace with error string */ | ||
1043 | if (len >= sizeof(xprt_str)) | ||
1044 | strcpy(xprt_str, "name-too-long\n"); | ||
1045 | /* Don't overflow buffer */ | ||
1046 | len = strlen(xprt_str); | ||
1047 | if (buflen && (len + totlen >= buflen)) | ||
1048 | break; | ||
1049 | strcpy(buf+totlen, xprt_str); | ||
1050 | totlen += len; | ||
1051 | } | ||
1052 | spin_unlock_bh(&serv->sv_lock); | ||
1053 | return totlen; | ||
1054 | } | ||
1055 | EXPORT_SYMBOL_GPL(svc_xprt_names); | ||
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index af7c5f05c6e1..8a73cbb16052 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c | |||
@@ -57,11 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) | |||
57 | rqstp->rq_authop = aops; | 57 | rqstp->rq_authop = aops; |
58 | return aops->accept(rqstp, authp); | 58 | return aops->accept(rqstp, authp); |
59 | } | 59 | } |
60 | EXPORT_SYMBOL(svc_authenticate); | ||
60 | 61 | ||
61 | int svc_set_client(struct svc_rqst *rqstp) | 62 | int svc_set_client(struct svc_rqst *rqstp) |
62 | { | 63 | { |
63 | return rqstp->rq_authop->set_client(rqstp); | 64 | return rqstp->rq_authop->set_client(rqstp); |
64 | } | 65 | } |
66 | EXPORT_SYMBOL(svc_set_client); | ||
65 | 67 | ||
66 | /* A request, which was authenticated, has now executed. | 68 | /* A request, which was authenticated, has now executed. |
67 | * Time to finalise the credentials and verifier | 69 | * Time to finalise the credentials and verifier |
@@ -93,6 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops) | |||
93 | spin_unlock(&authtab_lock); | 95 | spin_unlock(&authtab_lock); |
94 | return rv; | 96 | return rv; |
95 | } | 97 | } |
98 | EXPORT_SYMBOL(svc_auth_register); | ||
96 | 99 | ||
97 | void | 100 | void |
98 | svc_auth_unregister(rpc_authflavor_t flavor) | 101 | svc_auth_unregister(rpc_authflavor_t flavor) |
@@ -129,6 +132,7 @@ void auth_domain_put(struct auth_domain *dom) | |||
129 | spin_unlock(&auth_domain_lock); | 132 | spin_unlock(&auth_domain_lock); |
130 | } | 133 | } |
131 | } | 134 | } |
135 | EXPORT_SYMBOL(auth_domain_put); | ||
132 | 136 | ||
133 | struct auth_domain * | 137 | struct auth_domain * |
134 | auth_domain_lookup(char *name, struct auth_domain *new) | 138 | auth_domain_lookup(char *name, struct auth_domain *new) |
@@ -153,8 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new) | |||
153 | spin_unlock(&auth_domain_lock); | 157 | spin_unlock(&auth_domain_lock); |
154 | return new; | 158 | return new; |
155 | } | 159 | } |
160 | EXPORT_SYMBOL(auth_domain_lookup); | ||
156 | 161 | ||
157 | struct auth_domain *auth_domain_find(char *name) | 162 | struct auth_domain *auth_domain_find(char *name) |
158 | { | 163 | { |
159 | return auth_domain_lookup(name, NULL); | 164 | return auth_domain_lookup(name, NULL); |
160 | } | 165 | } |
166 | EXPORT_SYMBOL(auth_domain_find); | ||
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 411479411b21..3c64051e4555 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c | |||
@@ -63,6 +63,7 @@ struct auth_domain *unix_domain_find(char *name) | |||
63 | rv = auth_domain_lookup(name, &new->h); | 63 | rv = auth_domain_lookup(name, &new->h); |
64 | } | 64 | } |
65 | } | 65 | } |
66 | EXPORT_SYMBOL(unix_domain_find); | ||
66 | 67 | ||
67 | static void svcauth_unix_domain_release(struct auth_domain *dom) | 68 | static void svcauth_unix_domain_release(struct auth_domain *dom) |
68 | { | 69 | { |
@@ -340,6 +341,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) | |||
340 | else | 341 | else |
341 | return -ENOMEM; | 342 | return -ENOMEM; |
342 | } | 343 | } |
344 | EXPORT_SYMBOL(auth_unix_add_addr); | ||
343 | 345 | ||
344 | int auth_unix_forget_old(struct auth_domain *dom) | 346 | int auth_unix_forget_old(struct auth_domain *dom) |
345 | { | 347 | { |
@@ -351,6 +353,7 @@ int auth_unix_forget_old(struct auth_domain *dom) | |||
351 | udom->addr_changes++; | 353 | udom->addr_changes++; |
352 | return 0; | 354 | return 0; |
353 | } | 355 | } |
356 | EXPORT_SYMBOL(auth_unix_forget_old); | ||
354 | 357 | ||
355 | struct auth_domain *auth_unix_lookup(struct in_addr addr) | 358 | struct auth_domain *auth_unix_lookup(struct in_addr addr) |
356 | { | 359 | { |
@@ -375,50 +378,56 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) | |||
375 | cache_put(&ipm->h, &ip_map_cache); | 378 | cache_put(&ipm->h, &ip_map_cache); |
376 | return rv; | 379 | return rv; |
377 | } | 380 | } |
381 | EXPORT_SYMBOL(auth_unix_lookup); | ||
378 | 382 | ||
379 | void svcauth_unix_purge(void) | 383 | void svcauth_unix_purge(void) |
380 | { | 384 | { |
381 | cache_purge(&ip_map_cache); | 385 | cache_purge(&ip_map_cache); |
382 | } | 386 | } |
387 | EXPORT_SYMBOL(svcauth_unix_purge); | ||
383 | 388 | ||
384 | static inline struct ip_map * | 389 | static inline struct ip_map * |
385 | ip_map_cached_get(struct svc_rqst *rqstp) | 390 | ip_map_cached_get(struct svc_rqst *rqstp) |
386 | { | 391 | { |
387 | struct ip_map *ipm; | 392 | struct ip_map *ipm = NULL; |
388 | struct svc_sock *svsk = rqstp->rq_sock; | 393 | struct svc_xprt *xprt = rqstp->rq_xprt; |
389 | spin_lock(&svsk->sk_lock); | 394 | |
390 | ipm = svsk->sk_info_authunix; | 395 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { |
391 | if (ipm != NULL) { | 396 | spin_lock(&xprt->xpt_lock); |
392 | if (!cache_valid(&ipm->h)) { | 397 | ipm = xprt->xpt_auth_cache; |
393 | /* | 398 | if (ipm != NULL) { |
394 | * The entry has been invalidated since it was | 399 | if (!cache_valid(&ipm->h)) { |
395 | * remembered, e.g. by a second mount from the | 400 | /* |
396 | * same IP address. | 401 | * The entry has been invalidated since it was |
397 | */ | 402 | * remembered, e.g. by a second mount from the |
398 | svsk->sk_info_authunix = NULL; | 403 | * same IP address. |
399 | spin_unlock(&svsk->sk_lock); | 404 | */ |
400 | cache_put(&ipm->h, &ip_map_cache); | 405 | xprt->xpt_auth_cache = NULL; |
401 | return NULL; | 406 | spin_unlock(&xprt->xpt_lock); |
407 | cache_put(&ipm->h, &ip_map_cache); | ||
408 | return NULL; | ||
409 | } | ||
410 | cache_get(&ipm->h); | ||
402 | } | 411 | } |
403 | cache_get(&ipm->h); | 412 | spin_unlock(&xprt->xpt_lock); |
404 | } | 413 | } |
405 | spin_unlock(&svsk->sk_lock); | ||
406 | return ipm; | 414 | return ipm; |
407 | } | 415 | } |
408 | 416 | ||
409 | static inline void | 417 | static inline void |
410 | ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) | 418 | ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) |
411 | { | 419 | { |
412 | struct svc_sock *svsk = rqstp->rq_sock; | 420 | struct svc_xprt *xprt = rqstp->rq_xprt; |
413 | 421 | ||
414 | spin_lock(&svsk->sk_lock); | 422 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { |
415 | if (svsk->sk_sock->type == SOCK_STREAM && | 423 | spin_lock(&xprt->xpt_lock); |
416 | svsk->sk_info_authunix == NULL) { | 424 | if (xprt->xpt_auth_cache == NULL) { |
417 | /* newly cached, keep the reference */ | 425 | /* newly cached, keep the reference */ |
418 | svsk->sk_info_authunix = ipm; | 426 | xprt->xpt_auth_cache = ipm; |
419 | ipm = NULL; | 427 | ipm = NULL; |
428 | } | ||
429 | spin_unlock(&xprt->xpt_lock); | ||
420 | } | 430 | } |
421 | spin_unlock(&svsk->sk_lock); | ||
422 | if (ipm) | 431 | if (ipm) |
423 | cache_put(&ipm->h, &ip_map_cache); | 432 | cache_put(&ipm->h, &ip_map_cache); |
424 | } | 433 | } |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c75bffeb89eb..1d3e5fcc2cc4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * The server scheduling algorithm does not always distribute the load | 6 | * The server scheduling algorithm does not always distribute the load |
7 | * evenly when servicing a single client. May need to modify the | 7 | * evenly when servicing a single client. May need to modify the |
8 | * svc_sock_enqueue procedure... | 8 | * svc_xprt_enqueue procedure... |
9 | * | 9 | * |
10 | * TCP support is largely untested and may be a little slow. The problem | 10 | * TCP support is largely untested and may be a little slow. The problem |
11 | * is that we currently do two separate recvfrom's, one for the 4-byte | 11 | * is that we currently do two separate recvfrom's, one for the 4-byte |
@@ -48,72 +48,40 @@ | |||
48 | #include <linux/sunrpc/svcsock.h> | 48 | #include <linux/sunrpc/svcsock.h> |
49 | #include <linux/sunrpc/stats.h> | 49 | #include <linux/sunrpc/stats.h> |
50 | 50 | ||
51 | /* SMP locking strategy: | 51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT |
52 | * | ||
53 | * svc_pool->sp_lock protects most of the fields of that pool. | ||
54 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
55 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
56 | * BKL protects svc_serv->sv_nrthread. | ||
57 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | ||
58 | * and the ->sk_info_authunix cache. | ||
59 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | ||
60 | * | ||
61 | * Some flags can be set to certain values at any time | ||
62 | * providing that certain rules are followed: | ||
63 | * | ||
64 | * SK_CONN, SK_DATA, can be set or cleared at any time. | ||
65 | * after a set, svc_sock_enqueue must be called. | ||
66 | * after a clear, the socket must be read/accepted | ||
67 | * if this succeeds, it must be set again. | ||
68 | * SK_CLOSE can set at any time. It is never cleared. | ||
69 | * sk_inuse contains a bias of '1' until SK_DEAD is set. | ||
70 | * so when sk_inuse hits zero, we know the socket is dead | ||
71 | * and no-one is using it. | ||
72 | * SK_DEAD can only be set while SK_BUSY is held which ensures | ||
73 | * no other thread will be using the socket or will try to | ||
74 | * set SK_DEAD. | ||
75 | * | ||
76 | */ | ||
77 | |||
78 | #define RPCDBG_FACILITY RPCDBG_SVCSOCK | ||
79 | 52 | ||
80 | 53 | ||
81 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, | 54 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, |
82 | int *errp, int flags); | 55 | int *errp, int flags); |
83 | static void svc_delete_socket(struct svc_sock *svsk); | ||
84 | static void svc_udp_data_ready(struct sock *, int); | 56 | static void svc_udp_data_ready(struct sock *, int); |
85 | static int svc_udp_recvfrom(struct svc_rqst *); | 57 | static int svc_udp_recvfrom(struct svc_rqst *); |
86 | static int svc_udp_sendto(struct svc_rqst *); | 58 | static int svc_udp_sendto(struct svc_rqst *); |
87 | static void svc_close_socket(struct svc_sock *svsk); | 59 | static void svc_sock_detach(struct svc_xprt *); |
88 | 60 | static void svc_sock_free(struct svc_xprt *); | |
89 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); | ||
90 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
91 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
92 | |||
93 | /* apparently the "standard" is that clients close | ||
94 | * idle connections after 5 minutes, servers after | ||
95 | * 6 minutes | ||
96 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
97 | */ | ||
98 | static int svc_conn_age_period = 6*60; | ||
99 | 61 | ||
62 | static struct svc_xprt *svc_create_socket(struct svc_serv *, int, | ||
63 | struct sockaddr *, int, int); | ||
100 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 64 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
101 | static struct lock_class_key svc_key[2]; | 65 | static struct lock_class_key svc_key[2]; |
102 | static struct lock_class_key svc_slock_key[2]; | 66 | static struct lock_class_key svc_slock_key[2]; |
103 | 67 | ||
104 | static inline void svc_reclassify_socket(struct socket *sock) | 68 | static void svc_reclassify_socket(struct socket *sock) |
105 | { | 69 | { |
106 | struct sock *sk = sock->sk; | 70 | struct sock *sk = sock->sk; |
107 | BUG_ON(sock_owned_by_user(sk)); | 71 | BUG_ON(sock_owned_by_user(sk)); |
108 | switch (sk->sk_family) { | 72 | switch (sk->sk_family) { |
109 | case AF_INET: | 73 | case AF_INET: |
110 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", | 74 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", |
111 | &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); | 75 | &svc_slock_key[0], |
76 | "sk_xprt.xpt_lock-AF_INET-NFSD", | ||
77 | &svc_key[0]); | ||
112 | break; | 78 | break; |
113 | 79 | ||
114 | case AF_INET6: | 80 | case AF_INET6: |
115 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", | 81 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", |
116 | &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); | 82 | &svc_slock_key[1], |
83 | "sk_xprt.xpt_lock-AF_INET6-NFSD", | ||
84 | &svc_key[1]); | ||
117 | break; | 85 | break; |
118 | 86 | ||
119 | default: | 87 | default: |
@@ -121,81 +89,26 @@ static inline void svc_reclassify_socket(struct socket *sock) | |||
121 | } | 89 | } |
122 | } | 90 | } |
123 | #else | 91 | #else |
124 | static inline void svc_reclassify_socket(struct socket *sock) | 92 | static void svc_reclassify_socket(struct socket *sock) |
125 | { | 93 | { |
126 | } | 94 | } |
127 | #endif | 95 | #endif |
128 | 96 | ||
129 | static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len) | ||
130 | { | ||
131 | switch (addr->sa_family) { | ||
132 | case AF_INET: | ||
133 | snprintf(buf, len, "%u.%u.%u.%u, port=%u", | ||
134 | NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), | ||
135 | ntohs(((struct sockaddr_in *) addr)->sin_port)); | ||
136 | break; | ||
137 | |||
138 | case AF_INET6: | ||
139 | snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", | ||
140 | NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), | ||
141 | ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); | ||
142 | break; | ||
143 | |||
144 | default: | ||
145 | snprintf(buf, len, "unknown address type: %d", addr->sa_family); | ||
146 | break; | ||
147 | } | ||
148 | return buf; | ||
149 | } | ||
150 | |||
151 | /** | ||
152 | * svc_print_addr - Format rq_addr field for printing | ||
153 | * @rqstp: svc_rqst struct containing address to print | ||
154 | * @buf: target buffer for formatted address | ||
155 | * @len: length of target buffer | ||
156 | * | ||
157 | */ | ||
158 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | ||
159 | { | ||
160 | return __svc_print_addr(svc_addr(rqstp), buf, len); | ||
161 | } | ||
162 | EXPORT_SYMBOL_GPL(svc_print_addr); | ||
163 | |||
164 | /* | ||
165 | * Queue up an idle server thread. Must have pool->sp_lock held. | ||
166 | * Note: this is really a stack rather than a queue, so that we only | ||
167 | * use as many different threads as we need, and the rest don't pollute | ||
168 | * the cache. | ||
169 | */ | ||
170 | static inline void | ||
171 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
172 | { | ||
173 | list_add(&rqstp->rq_list, &pool->sp_threads); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | ||
178 | */ | ||
179 | static inline void | ||
180 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
181 | { | ||
182 | list_del(&rqstp->rq_list); | ||
183 | } | ||
184 | |||
185 | /* | 97 | /* |
186 | * Release an skbuff after use | 98 | * Release an skbuff after use |
187 | */ | 99 | */ |
188 | static inline void | 100 | static void svc_release_skb(struct svc_rqst *rqstp) |
189 | svc_release_skb(struct svc_rqst *rqstp) | ||
190 | { | 101 | { |
191 | struct sk_buff *skb = rqstp->rq_skbuff; | 102 | struct sk_buff *skb = rqstp->rq_xprt_ctxt; |
192 | struct svc_deferred_req *dr = rqstp->rq_deferred; | 103 | struct svc_deferred_req *dr = rqstp->rq_deferred; |
193 | 104 | ||
194 | if (skb) { | 105 | if (skb) { |
195 | rqstp->rq_skbuff = NULL; | 106 | struct svc_sock *svsk = |
107 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
108 | rqstp->rq_xprt_ctxt = NULL; | ||
196 | 109 | ||
197 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); | 110 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); |
198 | skb_free_datagram(rqstp->rq_sock->sk_sk, skb); | 111 | skb_free_datagram(svsk->sk_sk, skb); |
199 | } | 112 | } |
200 | if (dr) { | 113 | if (dr) { |
201 | rqstp->rq_deferred = NULL; | 114 | rqstp->rq_deferred = NULL; |
@@ -203,253 +116,6 @@ svc_release_skb(struct svc_rqst *rqstp) | |||
203 | } | 116 | } |
204 | } | 117 | } |
205 | 118 | ||
206 | /* | ||
207 | * Any space to write? | ||
208 | */ | ||
209 | static inline unsigned long | ||
210 | svc_sock_wspace(struct svc_sock *svsk) | ||
211 | { | ||
212 | int wspace; | ||
213 | |||
214 | if (svsk->sk_sock->type == SOCK_STREAM) | ||
215 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
216 | else | ||
217 | wspace = sock_wspace(svsk->sk_sk); | ||
218 | |||
219 | return wspace; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * Queue up a socket with data pending. If there are idle nfsd | ||
224 | * processes, wake 'em up. | ||
225 | * | ||
226 | */ | ||
227 | static void | ||
228 | svc_sock_enqueue(struct svc_sock *svsk) | ||
229 | { | ||
230 | struct svc_serv *serv = svsk->sk_server; | ||
231 | struct svc_pool *pool; | ||
232 | struct svc_rqst *rqstp; | ||
233 | int cpu; | ||
234 | |||
235 | if (!(svsk->sk_flags & | ||
236 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) | ||
237 | return; | ||
238 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
239 | return; | ||
240 | |||
241 | cpu = get_cpu(); | ||
242 | pool = svc_pool_for_cpu(svsk->sk_server, cpu); | ||
243 | put_cpu(); | ||
244 | |||
245 | spin_lock_bh(&pool->sp_lock); | ||
246 | |||
247 | if (!list_empty(&pool->sp_threads) && | ||
248 | !list_empty(&pool->sp_sockets)) | ||
249 | printk(KERN_ERR | ||
250 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | ||
251 | |||
252 | if (test_bit(SK_DEAD, &svsk->sk_flags)) { | ||
253 | /* Don't enqueue dead sockets */ | ||
254 | dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); | ||
255 | goto out_unlock; | ||
256 | } | ||
257 | |||
258 | /* Mark socket as busy. It will remain in this state until the | ||
259 | * server has processed all pending data and put the socket back | ||
260 | * on the idle list. We update SK_BUSY atomically because | ||
261 | * it also guards against trying to enqueue the svc_sock twice. | ||
262 | */ | ||
263 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { | ||
264 | /* Don't enqueue socket while already enqueued */ | ||
265 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | ||
266 | goto out_unlock; | ||
267 | } | ||
268 | BUG_ON(svsk->sk_pool != NULL); | ||
269 | svsk->sk_pool = pool; | ||
270 | |||
271 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
272 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 | ||
273 | > svc_sock_wspace(svsk)) | ||
274 | && !test_bit(SK_CLOSE, &svsk->sk_flags) | ||
275 | && !test_bit(SK_CONN, &svsk->sk_flags)) { | ||
276 | /* Don't enqueue while not enough space for reply */ | ||
277 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | ||
278 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, | ||
279 | svc_sock_wspace(svsk)); | ||
280 | svsk->sk_pool = NULL; | ||
281 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
282 | goto out_unlock; | ||
283 | } | ||
284 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
285 | |||
286 | |||
287 | if (!list_empty(&pool->sp_threads)) { | ||
288 | rqstp = list_entry(pool->sp_threads.next, | ||
289 | struct svc_rqst, | ||
290 | rq_list); | ||
291 | dprintk("svc: socket %p served by daemon %p\n", | ||
292 | svsk->sk_sk, rqstp); | ||
293 | svc_thread_dequeue(pool, rqstp); | ||
294 | if (rqstp->rq_sock) | ||
295 | printk(KERN_ERR | ||
296 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | ||
297 | rqstp, rqstp->rq_sock); | ||
298 | rqstp->rq_sock = svsk; | ||
299 | atomic_inc(&svsk->sk_inuse); | ||
300 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
301 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
302 | BUG_ON(svsk->sk_pool != pool); | ||
303 | wake_up(&rqstp->rq_wait); | ||
304 | } else { | ||
305 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | ||
306 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); | ||
307 | BUG_ON(svsk->sk_pool != pool); | ||
308 | } | ||
309 | |||
310 | out_unlock: | ||
311 | spin_unlock_bh(&pool->sp_lock); | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Dequeue the first socket. Must be called with the pool->sp_lock held. | ||
316 | */ | ||
317 | static inline struct svc_sock * | ||
318 | svc_sock_dequeue(struct svc_pool *pool) | ||
319 | { | ||
320 | struct svc_sock *svsk; | ||
321 | |||
322 | if (list_empty(&pool->sp_sockets)) | ||
323 | return NULL; | ||
324 | |||
325 | svsk = list_entry(pool->sp_sockets.next, | ||
326 | struct svc_sock, sk_ready); | ||
327 | list_del_init(&svsk->sk_ready); | ||
328 | |||
329 | dprintk("svc: socket %p dequeued, inuse=%d\n", | ||
330 | svsk->sk_sk, atomic_read(&svsk->sk_inuse)); | ||
331 | |||
332 | return svsk; | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Having read something from a socket, check whether it | ||
337 | * needs to be re-enqueued. | ||
338 | * Note: SK_DATA only gets cleared when a read-attempt finds | ||
339 | * no (or insufficient) data. | ||
340 | */ | ||
341 | static inline void | ||
342 | svc_sock_received(struct svc_sock *svsk) | ||
343 | { | ||
344 | svsk->sk_pool = NULL; | ||
345 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
346 | svc_sock_enqueue(svsk); | ||
347 | } | ||
348 | |||
349 | |||
350 | /** | ||
351 | * svc_reserve - change the space reserved for the reply to a request. | ||
352 | * @rqstp: The request in question | ||
353 | * @space: new max space to reserve | ||
354 | * | ||
355 | * Each request reserves some space on the output queue of the socket | ||
356 | * to make sure the reply fits. This function reduces that reserved | ||
357 | * space to be the amount of space used already, plus @space. | ||
358 | * | ||
359 | */ | ||
360 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
361 | { | ||
362 | space += rqstp->rq_res.head[0].iov_len; | ||
363 | |||
364 | if (space < rqstp->rq_reserved) { | ||
365 | struct svc_sock *svsk = rqstp->rq_sock; | ||
366 | atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); | ||
367 | rqstp->rq_reserved = space; | ||
368 | |||
369 | svc_sock_enqueue(svsk); | ||
370 | } | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Release a socket after use. | ||
375 | */ | ||
376 | static inline void | ||
377 | svc_sock_put(struct svc_sock *svsk) | ||
378 | { | ||
379 | if (atomic_dec_and_test(&svsk->sk_inuse)) { | ||
380 | BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); | ||
381 | |||
382 | dprintk("svc: releasing dead socket\n"); | ||
383 | if (svsk->sk_sock->file) | ||
384 | sockfd_put(svsk->sk_sock); | ||
385 | else | ||
386 | sock_release(svsk->sk_sock); | ||
387 | if (svsk->sk_info_authunix != NULL) | ||
388 | svcauth_unix_info_release(svsk->sk_info_authunix); | ||
389 | kfree(svsk); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | static void | ||
394 | svc_sock_release(struct svc_rqst *rqstp) | ||
395 | { | ||
396 | struct svc_sock *svsk = rqstp->rq_sock; | ||
397 | |||
398 | svc_release_skb(rqstp); | ||
399 | |||
400 | svc_free_res_pages(rqstp); | ||
401 | rqstp->rq_res.page_len = 0; | ||
402 | rqstp->rq_res.page_base = 0; | ||
403 | |||
404 | |||
405 | /* Reset response buffer and release | ||
406 | * the reservation. | ||
407 | * But first, check that enough space was reserved | ||
408 | * for the reply, otherwise we have a bug! | ||
409 | */ | ||
410 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
411 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
412 | rqstp->rq_reserved, | ||
413 | rqstp->rq_res.len); | ||
414 | |||
415 | rqstp->rq_res.head[0].iov_len = 0; | ||
416 | svc_reserve(rqstp, 0); | ||
417 | rqstp->rq_sock = NULL; | ||
418 | |||
419 | svc_sock_put(svsk); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * External function to wake up a server waiting for data | ||
424 | * This really only makes sense for services like lockd | ||
425 | * which have exactly one thread anyway. | ||
426 | */ | ||
427 | void | ||
428 | svc_wake_up(struct svc_serv *serv) | ||
429 | { | ||
430 | struct svc_rqst *rqstp; | ||
431 | unsigned int i; | ||
432 | struct svc_pool *pool; | ||
433 | |||
434 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
435 | pool = &serv->sv_pools[i]; | ||
436 | |||
437 | spin_lock_bh(&pool->sp_lock); | ||
438 | if (!list_empty(&pool->sp_threads)) { | ||
439 | rqstp = list_entry(pool->sp_threads.next, | ||
440 | struct svc_rqst, | ||
441 | rq_list); | ||
442 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
443 | /* | ||
444 | svc_thread_dequeue(pool, rqstp); | ||
445 | rqstp->rq_sock = NULL; | ||
446 | */ | ||
447 | wake_up(&rqstp->rq_wait); | ||
448 | } | ||
449 | spin_unlock_bh(&pool->sp_lock); | ||
450 | } | ||
451 | } | ||
452 | |||
453 | union svc_pktinfo_u { | 119 | union svc_pktinfo_u { |
454 | struct in_pktinfo pkti; | 120 | struct in_pktinfo pkti; |
455 | struct in6_pktinfo pkti6; | 121 | struct in6_pktinfo pkti6; |
@@ -459,7 +125,9 @@ union svc_pktinfo_u { | |||
459 | 125 | ||
460 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | 126 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) |
461 | { | 127 | { |
462 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 128 | struct svc_sock *svsk = |
129 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
130 | switch (svsk->sk_sk->sk_family) { | ||
463 | case AF_INET: { | 131 | case AF_INET: { |
464 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 132 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
465 | 133 | ||
@@ -489,10 +157,10 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | |||
489 | /* | 157 | /* |
490 | * Generic sendto routine | 158 | * Generic sendto routine |
491 | */ | 159 | */ |
492 | static int | 160 | static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) |
493 | svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | ||
494 | { | 161 | { |
495 | struct svc_sock *svsk = rqstp->rq_sock; | 162 | struct svc_sock *svsk = |
163 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
496 | struct socket *sock = svsk->sk_sock; | 164 | struct socket *sock = svsk->sk_sock; |
497 | int slen; | 165 | int slen; |
498 | union { | 166 | union { |
@@ -565,7 +233,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | |||
565 | } | 233 | } |
566 | out: | 234 | out: |
567 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", | 235 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", |
568 | rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, | 236 | svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, |
569 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); | 237 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); |
570 | 238 | ||
571 | return len; | 239 | return len; |
@@ -602,7 +270,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
602 | if (!serv) | 270 | if (!serv) |
603 | return 0; | 271 | return 0; |
604 | spin_lock_bh(&serv->sv_lock); | 272 | spin_lock_bh(&serv->sv_lock); |
605 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { | 273 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { |
606 | int onelen = one_sock_name(buf+len, svsk); | 274 | int onelen = one_sock_name(buf+len, svsk); |
607 | if (toclose && strcmp(toclose, buf+len) == 0) | 275 | if (toclose && strcmp(toclose, buf+len) == 0) |
608 | closesk = svsk; | 276 | closesk = svsk; |
@@ -614,7 +282,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
614 | /* Should unregister with portmap, but you cannot | 282 | /* Should unregister with portmap, but you cannot |
615 | * unregister just one protocol... | 283 | * unregister just one protocol... |
616 | */ | 284 | */ |
617 | svc_close_socket(closesk); | 285 | svc_close_xprt(&closesk->sk_xprt); |
618 | else if (toclose) | 286 | else if (toclose) |
619 | return -ENOENT; | 287 | return -ENOENT; |
620 | return len; | 288 | return len; |
@@ -624,8 +292,7 @@ EXPORT_SYMBOL(svc_sock_names); | |||
624 | /* | 292 | /* |
625 | * Check input queue length | 293 | * Check input queue length |
626 | */ | 294 | */ |
627 | static int | 295 | static int svc_recv_available(struct svc_sock *svsk) |
628 | svc_recv_available(struct svc_sock *svsk) | ||
629 | { | 296 | { |
630 | struct socket *sock = svsk->sk_sock; | 297 | struct socket *sock = svsk->sk_sock; |
631 | int avail, err; | 298 | int avail, err; |
@@ -638,48 +305,31 @@ svc_recv_available(struct svc_sock *svsk) | |||
638 | /* | 305 | /* |
639 | * Generic recvfrom routine. | 306 | * Generic recvfrom routine. |
640 | */ | 307 | */ |
641 | static int | 308 | static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, |
642 | svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) | 309 | int buflen) |
643 | { | 310 | { |
644 | struct svc_sock *svsk = rqstp->rq_sock; | 311 | struct svc_sock *svsk = |
312 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
645 | struct msghdr msg = { | 313 | struct msghdr msg = { |
646 | .msg_flags = MSG_DONTWAIT, | 314 | .msg_flags = MSG_DONTWAIT, |
647 | }; | 315 | }; |
648 | struct sockaddr *sin; | ||
649 | int len; | 316 | int len; |
650 | 317 | ||
318 | rqstp->rq_xprt_hlen = 0; | ||
319 | |||
651 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, | 320 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, |
652 | msg.msg_flags); | 321 | msg.msg_flags); |
653 | 322 | ||
654 | /* sock_recvmsg doesn't fill in the name/namelen, so we must.. | ||
655 | */ | ||
656 | memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); | ||
657 | rqstp->rq_addrlen = svsk->sk_remotelen; | ||
658 | |||
659 | /* Destination address in request is needed for binding the | ||
660 | * source address in RPC callbacks later. | ||
661 | */ | ||
662 | sin = (struct sockaddr *)&svsk->sk_local; | ||
663 | switch (sin->sa_family) { | ||
664 | case AF_INET: | ||
665 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | ||
666 | break; | ||
667 | case AF_INET6: | ||
668 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | ||
669 | break; | ||
670 | } | ||
671 | |||
672 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", | 323 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", |
673 | svsk, iov[0].iov_base, iov[0].iov_len, len); | 324 | svsk, iov[0].iov_base, iov[0].iov_len, len); |
674 | |||
675 | return len; | 325 | return len; |
676 | } | 326 | } |
677 | 327 | ||
678 | /* | 328 | /* |
679 | * Set socket snd and rcv buffer lengths | 329 | * Set socket snd and rcv buffer lengths |
680 | */ | 330 | */ |
681 | static inline void | 331 | static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, |
682 | svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | 332 | unsigned int rcv) |
683 | { | 333 | { |
684 | #if 0 | 334 | #if 0 |
685 | mm_segment_t oldfs; | 335 | mm_segment_t oldfs; |
@@ -704,16 +354,16 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | |||
704 | /* | 354 | /* |
705 | * INET callback when data has been received on the socket. | 355 | * INET callback when data has been received on the socket. |
706 | */ | 356 | */ |
707 | static void | 357 | static void svc_udp_data_ready(struct sock *sk, int count) |
708 | svc_udp_data_ready(struct sock *sk, int count) | ||
709 | { | 358 | { |
710 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 359 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
711 | 360 | ||
712 | if (svsk) { | 361 | if (svsk) { |
713 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", | 362 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", |
714 | svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); | 363 | svsk, sk, count, |
715 | set_bit(SK_DATA, &svsk->sk_flags); | 364 | test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
716 | svc_sock_enqueue(svsk); | 365 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
366 | svc_xprt_enqueue(&svsk->sk_xprt); | ||
717 | } | 367 | } |
718 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 368 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
719 | wake_up_interruptible(sk->sk_sleep); | 369 | wake_up_interruptible(sk->sk_sleep); |
@@ -722,15 +372,14 @@ svc_udp_data_ready(struct sock *sk, int count) | |||
722 | /* | 372 | /* |
723 | * INET callback when space is newly available on the socket. | 373 | * INET callback when space is newly available on the socket. |
724 | */ | 374 | */ |
725 | static void | 375 | static void svc_write_space(struct sock *sk) |
726 | svc_write_space(struct sock *sk) | ||
727 | { | 376 | { |
728 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); | 377 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); |
729 | 378 | ||
730 | if (svsk) { | 379 | if (svsk) { |
731 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", | 380 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", |
732 | svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); | 381 | svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
733 | svc_sock_enqueue(svsk); | 382 | svc_xprt_enqueue(&svsk->sk_xprt); |
734 | } | 383 | } |
735 | 384 | ||
736 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { | 385 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { |
@@ -740,10 +389,19 @@ svc_write_space(struct sock *sk) | |||
740 | } | 389 | } |
741 | } | 390 | } |
742 | 391 | ||
743 | static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | 392 | /* |
744 | struct cmsghdr *cmh) | 393 | * Copy the UDP datagram's destination address to the rqstp structure. |
394 | * The 'destination' address in this case is the address to which the | ||
395 | * peer sent the datagram, i.e. our local address. For multihomed | ||
396 | * hosts, this can change from msg to msg. Note that only the IP | ||
397 | * address changes, the port number should remain the same. | ||
398 | */ | ||
399 | static void svc_udp_get_dest_address(struct svc_rqst *rqstp, | ||
400 | struct cmsghdr *cmh) | ||
745 | { | 401 | { |
746 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 402 | struct svc_sock *svsk = |
403 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
404 | switch (svsk->sk_sk->sk_family) { | ||
747 | case AF_INET: { | 405 | case AF_INET: { |
748 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 406 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
749 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; | 407 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; |
@@ -760,11 +418,11 @@ static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | |||
760 | /* | 418 | /* |
761 | * Receive a datagram from a UDP socket. | 419 | * Receive a datagram from a UDP socket. |
762 | */ | 420 | */ |
763 | static int | 421 | static int svc_udp_recvfrom(struct svc_rqst *rqstp) |
764 | svc_udp_recvfrom(struct svc_rqst *rqstp) | ||
765 | { | 422 | { |
766 | struct svc_sock *svsk = rqstp->rq_sock; | 423 | struct svc_sock *svsk = |
767 | struct svc_serv *serv = svsk->sk_server; | 424 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
425 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
768 | struct sk_buff *skb; | 426 | struct sk_buff *skb; |
769 | union { | 427 | union { |
770 | struct cmsghdr hdr; | 428 | struct cmsghdr hdr; |
@@ -779,7 +437,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
779 | .msg_flags = MSG_DONTWAIT, | 437 | .msg_flags = MSG_DONTWAIT, |
780 | }; | 438 | }; |
781 | 439 | ||
782 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | 440 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
783 | /* udp sockets need large rcvbuf as all pending | 441 | /* udp sockets need large rcvbuf as all pending |
784 | * requests are still in that buffer. sndbuf must | 442 | * requests are still in that buffer. sndbuf must |
785 | * also be large enough that there is enough space | 443 | * also be large enough that there is enough space |
@@ -792,17 +450,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
792 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 450 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
793 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); | 451 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); |
794 | 452 | ||
795 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 453 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
796 | svc_sock_received(svsk); | ||
797 | return svc_deferred_recv(rqstp); | ||
798 | } | ||
799 | |||
800 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
801 | svc_delete_socket(svsk); | ||
802 | return 0; | ||
803 | } | ||
804 | |||
805 | clear_bit(SK_DATA, &svsk->sk_flags); | ||
806 | skb = NULL; | 454 | skb = NULL; |
807 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, | 455 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, |
808 | 0, 0, MSG_PEEK | MSG_DONTWAIT); | 456 | 0, 0, MSG_PEEK | MSG_DONTWAIT); |
@@ -813,24 +461,27 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
813 | if (err != -EAGAIN) { | 461 | if (err != -EAGAIN) { |
814 | /* possibly an icmp error */ | 462 | /* possibly an icmp error */ |
815 | dprintk("svc: recvfrom returned error %d\n", -err); | 463 | dprintk("svc: recvfrom returned error %d\n", -err); |
816 | set_bit(SK_DATA, &svsk->sk_flags); | 464 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
817 | } | 465 | } |
818 | svc_sock_received(svsk); | 466 | svc_xprt_received(&svsk->sk_xprt); |
819 | return -EAGAIN; | 467 | return -EAGAIN; |
820 | } | 468 | } |
821 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); | 469 | len = svc_addr_len(svc_addr(rqstp)); |
470 | if (len < 0) | ||
471 | return len; | ||
472 | rqstp->rq_addrlen = len; | ||
822 | if (skb->tstamp.tv64 == 0) { | 473 | if (skb->tstamp.tv64 == 0) { |
823 | skb->tstamp = ktime_get_real(); | 474 | skb->tstamp = ktime_get_real(); |
824 | /* Don't enable netstamp, sunrpc doesn't | 475 | /* Don't enable netstamp, sunrpc doesn't |
825 | need that much accuracy */ | 476 | need that much accuracy */ |
826 | } | 477 | } |
827 | svsk->sk_sk->sk_stamp = skb->tstamp; | 478 | svsk->sk_sk->sk_stamp = skb->tstamp; |
828 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ | 479 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ |
829 | 480 | ||
830 | /* | 481 | /* |
831 | * Maybe more packets - kick another thread ASAP. | 482 | * Maybe more packets - kick another thread ASAP. |
832 | */ | 483 | */ |
833 | svc_sock_received(svsk); | 484 | svc_xprt_received(&svsk->sk_xprt); |
834 | 485 | ||
835 | len = skb->len - sizeof(struct udphdr); | 486 | len = skb->len - sizeof(struct udphdr); |
836 | rqstp->rq_arg.len = len; | 487 | rqstp->rq_arg.len = len; |
@@ -861,13 +512,14 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
861 | skb_free_datagram(svsk->sk_sk, skb); | 512 | skb_free_datagram(svsk->sk_sk, skb); |
862 | } else { | 513 | } else { |
863 | /* we can use it in-place */ | 514 | /* we can use it in-place */ |
864 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); | 515 | rqstp->rq_arg.head[0].iov_base = skb->data + |
516 | sizeof(struct udphdr); | ||
865 | rqstp->rq_arg.head[0].iov_len = len; | 517 | rqstp->rq_arg.head[0].iov_len = len; |
866 | if (skb_checksum_complete(skb)) { | 518 | if (skb_checksum_complete(skb)) { |
867 | skb_free_datagram(svsk->sk_sk, skb); | 519 | skb_free_datagram(svsk->sk_sk, skb); |
868 | return 0; | 520 | return 0; |
869 | } | 521 | } |
870 | rqstp->rq_skbuff = skb; | 522 | rqstp->rq_xprt_ctxt = skb; |
871 | } | 523 | } |
872 | 524 | ||
873 | rqstp->rq_arg.page_base = 0; | 525 | rqstp->rq_arg.page_base = 0; |
@@ -900,27 +552,81 @@ svc_udp_sendto(struct svc_rqst *rqstp) | |||
900 | return error; | 552 | return error; |
901 | } | 553 | } |
902 | 554 | ||
903 | static void | 555 | static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp) |
904 | svc_udp_init(struct svc_sock *svsk) | 556 | { |
557 | } | ||
558 | |||
559 | static int svc_udp_has_wspace(struct svc_xprt *xprt) | ||
560 | { | ||
561 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
562 | struct svc_serv *serv = xprt->xpt_server; | ||
563 | unsigned long required; | ||
564 | |||
565 | /* | ||
566 | * Set the SOCK_NOSPACE flag before checking the available | ||
567 | * sock space. | ||
568 | */ | ||
569 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
570 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
571 | if (required*2 > sock_wspace(svsk->sk_sk)) | ||
572 | return 0; | ||
573 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
574 | return 1; | ||
575 | } | ||
576 | |||
577 | static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) | ||
578 | { | ||
579 | BUG(); | ||
580 | return NULL; | ||
581 | } | ||
582 | |||
583 | static struct svc_xprt *svc_udp_create(struct svc_serv *serv, | ||
584 | struct sockaddr *sa, int salen, | ||
585 | int flags) | ||
586 | { | ||
587 | return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); | ||
588 | } | ||
589 | |||
590 | static struct svc_xprt_ops svc_udp_ops = { | ||
591 | .xpo_create = svc_udp_create, | ||
592 | .xpo_recvfrom = svc_udp_recvfrom, | ||
593 | .xpo_sendto = svc_udp_sendto, | ||
594 | .xpo_release_rqst = svc_release_skb, | ||
595 | .xpo_detach = svc_sock_detach, | ||
596 | .xpo_free = svc_sock_free, | ||
597 | .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, | ||
598 | .xpo_has_wspace = svc_udp_has_wspace, | ||
599 | .xpo_accept = svc_udp_accept, | ||
600 | }; | ||
601 | |||
602 | static struct svc_xprt_class svc_udp_class = { | ||
603 | .xcl_name = "udp", | ||
604 | .xcl_owner = THIS_MODULE, | ||
605 | .xcl_ops = &svc_udp_ops, | ||
606 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, | ||
607 | }; | ||
608 | |||
609 | static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
905 | { | 610 | { |
906 | int one = 1; | 611 | int one = 1; |
907 | mm_segment_t oldfs; | 612 | mm_segment_t oldfs; |
908 | 613 | ||
614 | svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); | ||
615 | clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); | ||
909 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; | 616 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; |
910 | svsk->sk_sk->sk_write_space = svc_write_space; | 617 | svsk->sk_sk->sk_write_space = svc_write_space; |
911 | svsk->sk_recvfrom = svc_udp_recvfrom; | ||
912 | svsk->sk_sendto = svc_udp_sendto; | ||
913 | 618 | ||
914 | /* initialise setting must have enough space to | 619 | /* initialise setting must have enough space to |
915 | * receive and respond to one request. | 620 | * receive and respond to one request. |
916 | * svc_udp_recvfrom will re-adjust if necessary | 621 | * svc_udp_recvfrom will re-adjust if necessary |
917 | */ | 622 | */ |
918 | svc_sock_setbufsize(svsk->sk_sock, | 623 | svc_sock_setbufsize(svsk->sk_sock, |
919 | 3 * svsk->sk_server->sv_max_mesg, | 624 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
920 | 3 * svsk->sk_server->sv_max_mesg); | 625 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
921 | 626 | ||
922 | set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ | 627 | /* data might have come in before data_ready set up */ |
923 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 628 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
629 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); | ||
924 | 630 | ||
925 | oldfs = get_fs(); | 631 | oldfs = get_fs(); |
926 | set_fs(KERNEL_DS); | 632 | set_fs(KERNEL_DS); |
@@ -934,8 +640,7 @@ svc_udp_init(struct svc_sock *svsk) | |||
934 | * A data_ready event on a listening socket means there's a connection | 640 | * A data_ready event on a listening socket means there's a connection |
935 | * pending. Do not use state_change as a substitute for it. | 641 | * pending. Do not use state_change as a substitute for it. |
936 | */ | 642 | */ |
937 | static void | 643 | static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) |
938 | svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | ||
939 | { | 644 | { |
940 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 645 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
941 | 646 | ||
@@ -954,8 +659,8 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
954 | */ | 659 | */ |
955 | if (sk->sk_state == TCP_LISTEN) { | 660 | if (sk->sk_state == TCP_LISTEN) { |
956 | if (svsk) { | 661 | if (svsk) { |
957 | set_bit(SK_CONN, &svsk->sk_flags); | 662 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
958 | svc_sock_enqueue(svsk); | 663 | svc_xprt_enqueue(&svsk->sk_xprt); |
959 | } else | 664 | } else |
960 | printk("svc: socket %p: no user data\n", sk); | 665 | printk("svc: socket %p: no user data\n", sk); |
961 | } | 666 | } |
@@ -967,8 +672,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
967 | /* | 672 | /* |
968 | * A state change on a connected socket means it's dying or dead. | 673 | * A state change on a connected socket means it's dying or dead. |
969 | */ | 674 | */ |
970 | static void | 675 | static void svc_tcp_state_change(struct sock *sk) |
971 | svc_tcp_state_change(struct sock *sk) | ||
972 | { | 676 | { |
973 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 677 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
974 | 678 | ||
@@ -978,51 +682,36 @@ svc_tcp_state_change(struct sock *sk) | |||
978 | if (!svsk) | 682 | if (!svsk) |
979 | printk("svc: socket %p: no user data\n", sk); | 683 | printk("svc: socket %p: no user data\n", sk); |
980 | else { | 684 | else { |
981 | set_bit(SK_CLOSE, &svsk->sk_flags); | 685 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
982 | svc_sock_enqueue(svsk); | 686 | svc_xprt_enqueue(&svsk->sk_xprt); |
983 | } | 687 | } |
984 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 688 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
985 | wake_up_interruptible_all(sk->sk_sleep); | 689 | wake_up_interruptible_all(sk->sk_sleep); |
986 | } | 690 | } |
987 | 691 | ||
988 | static void | 692 | static void svc_tcp_data_ready(struct sock *sk, int count) |
989 | svc_tcp_data_ready(struct sock *sk, int count) | ||
990 | { | 693 | { |
991 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 694 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
992 | 695 | ||
993 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", | 696 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", |
994 | sk, sk->sk_user_data); | 697 | sk, sk->sk_user_data); |
995 | if (svsk) { | 698 | if (svsk) { |
996 | set_bit(SK_DATA, &svsk->sk_flags); | 699 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
997 | svc_sock_enqueue(svsk); | 700 | svc_xprt_enqueue(&svsk->sk_xprt); |
998 | } | 701 | } |
999 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 702 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
1000 | wake_up_interruptible(sk->sk_sleep); | 703 | wake_up_interruptible(sk->sk_sleep); |
1001 | } | 704 | } |
1002 | 705 | ||
1003 | static inline int svc_port_is_privileged(struct sockaddr *sin) | ||
1004 | { | ||
1005 | switch (sin->sa_family) { | ||
1006 | case AF_INET: | ||
1007 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | ||
1008 | < PROT_SOCK; | ||
1009 | case AF_INET6: | ||
1010 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | ||
1011 | < PROT_SOCK; | ||
1012 | default: | ||
1013 | return 0; | ||
1014 | } | ||
1015 | } | ||
1016 | |||
1017 | /* | 706 | /* |
1018 | * Accept a TCP connection | 707 | * Accept a TCP connection |
1019 | */ | 708 | */ |
1020 | static void | 709 | static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) |
1021 | svc_tcp_accept(struct svc_sock *svsk) | ||
1022 | { | 710 | { |
711 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
1023 | struct sockaddr_storage addr; | 712 | struct sockaddr_storage addr; |
1024 | struct sockaddr *sin = (struct sockaddr *) &addr; | 713 | struct sockaddr *sin = (struct sockaddr *) &addr; |
1025 | struct svc_serv *serv = svsk->sk_server; | 714 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; |
1026 | struct socket *sock = svsk->sk_sock; | 715 | struct socket *sock = svsk->sk_sock; |
1027 | struct socket *newsock; | 716 | struct socket *newsock; |
1028 | struct svc_sock *newsvsk; | 717 | struct svc_sock *newsvsk; |
@@ -1031,9 +720,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1031 | 720 | ||
1032 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); | 721 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); |
1033 | if (!sock) | 722 | if (!sock) |
1034 | return; | 723 | return NULL; |
1035 | 724 | ||
1036 | clear_bit(SK_CONN, &svsk->sk_flags); | 725 | clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
1037 | err = kernel_accept(sock, &newsock, O_NONBLOCK); | 726 | err = kernel_accept(sock, &newsock, O_NONBLOCK); |
1038 | if (err < 0) { | 727 | if (err < 0) { |
1039 | if (err == -ENOMEM) | 728 | if (err == -ENOMEM) |
@@ -1042,11 +731,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1042 | else if (err != -EAGAIN && net_ratelimit()) | 731 | else if (err != -EAGAIN && net_ratelimit()) |
1043 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", | 732 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", |
1044 | serv->sv_name, -err); | 733 | serv->sv_name, -err); |
1045 | return; | 734 | return NULL; |
1046 | } | 735 | } |
1047 | 736 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); | |
1048 | set_bit(SK_CONN, &svsk->sk_flags); | ||
1049 | svc_sock_enqueue(svsk); | ||
1050 | 737 | ||
1051 | err = kernel_getpeername(newsock, sin, &slen); | 738 | err = kernel_getpeername(newsock, sin, &slen); |
1052 | if (err < 0) { | 739 | if (err < 0) { |
@@ -1077,106 +764,42 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
1077 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, | 764 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, |
1078 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) | 765 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) |
1079 | goto failed; | 766 | goto failed; |
1080 | memcpy(&newsvsk->sk_remote, sin, slen); | 767 | svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); |
1081 | newsvsk->sk_remotelen = slen; | ||
1082 | err = kernel_getsockname(newsock, sin, &slen); | 768 | err = kernel_getsockname(newsock, sin, &slen); |
1083 | if (unlikely(err < 0)) { | 769 | if (unlikely(err < 0)) { |
1084 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); | 770 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); |
1085 | slen = offsetof(struct sockaddr, sa_data); | 771 | slen = offsetof(struct sockaddr, sa_data); |
1086 | } | 772 | } |
1087 | memcpy(&newsvsk->sk_local, sin, slen); | 773 | svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); |
1088 | |||
1089 | svc_sock_received(newsvsk); | ||
1090 | |||
1091 | /* make sure that we don't have too many active connections. | ||
1092 | * If we have, something must be dropped. | ||
1093 | * | ||
1094 | * There's no point in trying to do random drop here for | ||
1095 | * DoS prevention. The NFS clients does 1 reconnect in 15 | ||
1096 | * seconds. An attacker can easily beat that. | ||
1097 | * | ||
1098 | * The only somewhat efficient mechanism would be if drop | ||
1099 | * old connections from the same IP first. But right now | ||
1100 | * we don't even record the client IP in svc_sock. | ||
1101 | */ | ||
1102 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
1103 | struct svc_sock *svsk = NULL; | ||
1104 | spin_lock_bh(&serv->sv_lock); | ||
1105 | if (!list_empty(&serv->sv_tempsocks)) { | ||
1106 | if (net_ratelimit()) { | ||
1107 | /* Try to help the admin */ | ||
1108 | printk(KERN_NOTICE "%s: too many open TCP " | ||
1109 | "sockets, consider increasing the " | ||
1110 | "number of nfsd threads\n", | ||
1111 | serv->sv_name); | ||
1112 | printk(KERN_NOTICE | ||
1113 | "%s: last TCP connect from %s\n", | ||
1114 | serv->sv_name, __svc_print_addr(sin, | ||
1115 | buf, sizeof(buf))); | ||
1116 | } | ||
1117 | /* | ||
1118 | * Always select the oldest socket. It's not fair, | ||
1119 | * but so is life | ||
1120 | */ | ||
1121 | svsk = list_entry(serv->sv_tempsocks.prev, | ||
1122 | struct svc_sock, | ||
1123 | sk_list); | ||
1124 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1125 | atomic_inc(&svsk->sk_inuse); | ||
1126 | } | ||
1127 | spin_unlock_bh(&serv->sv_lock); | ||
1128 | |||
1129 | if (svsk) { | ||
1130 | svc_sock_enqueue(svsk); | ||
1131 | svc_sock_put(svsk); | ||
1132 | } | ||
1133 | |||
1134 | } | ||
1135 | 774 | ||
1136 | if (serv->sv_stats) | 775 | if (serv->sv_stats) |
1137 | serv->sv_stats->nettcpconn++; | 776 | serv->sv_stats->nettcpconn++; |
1138 | 777 | ||
1139 | return; | 778 | return &newsvsk->sk_xprt; |
1140 | 779 | ||
1141 | failed: | 780 | failed: |
1142 | sock_release(newsock); | 781 | sock_release(newsock); |
1143 | return; | 782 | return NULL; |
1144 | } | 783 | } |
1145 | 784 | ||
1146 | /* | 785 | /* |
1147 | * Receive data from a TCP socket. | 786 | * Receive data from a TCP socket. |
1148 | */ | 787 | */ |
1149 | static int | 788 | static int svc_tcp_recvfrom(struct svc_rqst *rqstp) |
1150 | svc_tcp_recvfrom(struct svc_rqst *rqstp) | ||
1151 | { | 789 | { |
1152 | struct svc_sock *svsk = rqstp->rq_sock; | 790 | struct svc_sock *svsk = |
1153 | struct svc_serv *serv = svsk->sk_server; | 791 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
792 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
1154 | int len; | 793 | int len; |
1155 | struct kvec *vec; | 794 | struct kvec *vec; |
1156 | int pnum, vlen; | 795 | int pnum, vlen; |
1157 | 796 | ||
1158 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", | 797 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", |
1159 | svsk, test_bit(SK_DATA, &svsk->sk_flags), | 798 | svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), |
1160 | test_bit(SK_CONN, &svsk->sk_flags), | 799 | test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), |
1161 | test_bit(SK_CLOSE, &svsk->sk_flags)); | 800 | test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); |
1162 | 801 | ||
1163 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 802 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
1164 | svc_sock_received(svsk); | ||
1165 | return svc_deferred_recv(rqstp); | ||
1166 | } | ||
1167 | |||
1168 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
1169 | svc_delete_socket(svsk); | ||
1170 | return 0; | ||
1171 | } | ||
1172 | |||
1173 | if (svsk->sk_sk->sk_state == TCP_LISTEN) { | ||
1174 | svc_tcp_accept(svsk); | ||
1175 | svc_sock_received(svsk); | ||
1176 | return 0; | ||
1177 | } | ||
1178 | |||
1179 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | ||
1180 | /* sndbuf needs to have room for one request | 803 | /* sndbuf needs to have room for one request |
1181 | * per thread, otherwise we can stall even when the | 804 | * per thread, otherwise we can stall even when the |
1182 | * network isn't a bottleneck. | 805 | * network isn't a bottleneck. |
@@ -1193,7 +816,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1193 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 816 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
1194 | 3 * serv->sv_max_mesg); | 817 | 3 * serv->sv_max_mesg); |
1195 | 818 | ||
1196 | clear_bit(SK_DATA, &svsk->sk_flags); | 819 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1197 | 820 | ||
1198 | /* Receive data. If we haven't got the record length yet, get | 821 | /* Receive data. If we haven't got the record length yet, get |
1199 | * the next four bytes. Otherwise try to gobble up as much as | 822 | * the next four bytes. Otherwise try to gobble up as much as |
@@ -1212,7 +835,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1212 | if (len < want) { | 835 | if (len < want) { |
1213 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", | 836 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", |
1214 | len, want); | 837 | len, want); |
1215 | svc_sock_received(svsk); | 838 | svc_xprt_received(&svsk->sk_xprt); |
1216 | return -EAGAIN; /* record header not complete */ | 839 | return -EAGAIN; /* record header not complete */ |
1217 | } | 840 | } |
1218 | 841 | ||
@@ -1248,11 +871,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1248 | if (len < svsk->sk_reclen) { | 871 | if (len < svsk->sk_reclen) { |
1249 | dprintk("svc: incomplete TCP record (%d of %d)\n", | 872 | dprintk("svc: incomplete TCP record (%d of %d)\n", |
1250 | len, svsk->sk_reclen); | 873 | len, svsk->sk_reclen); |
1251 | svc_sock_received(svsk); | 874 | svc_xprt_received(&svsk->sk_xprt); |
1252 | return -EAGAIN; /* record not complete */ | 875 | return -EAGAIN; /* record not complete */ |
1253 | } | 876 | } |
1254 | len = svsk->sk_reclen; | 877 | len = svsk->sk_reclen; |
1255 | set_bit(SK_DATA, &svsk->sk_flags); | 878 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1256 | 879 | ||
1257 | vec = rqstp->rq_vec; | 880 | vec = rqstp->rq_vec; |
1258 | vec[0] = rqstp->rq_arg.head[0]; | 881 | vec[0] = rqstp->rq_arg.head[0]; |
@@ -1281,30 +904,31 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1281 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | 904 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; |
1282 | } | 905 | } |
1283 | 906 | ||
1284 | rqstp->rq_skbuff = NULL; | 907 | rqstp->rq_xprt_ctxt = NULL; |
1285 | rqstp->rq_prot = IPPROTO_TCP; | 908 | rqstp->rq_prot = IPPROTO_TCP; |
1286 | 909 | ||
1287 | /* Reset TCP read info */ | 910 | /* Reset TCP read info */ |
1288 | svsk->sk_reclen = 0; | 911 | svsk->sk_reclen = 0; |
1289 | svsk->sk_tcplen = 0; | 912 | svsk->sk_tcplen = 0; |
1290 | 913 | ||
1291 | svc_sock_received(svsk); | 914 | svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); |
915 | svc_xprt_received(&svsk->sk_xprt); | ||
1292 | if (serv->sv_stats) | 916 | if (serv->sv_stats) |
1293 | serv->sv_stats->nettcpcnt++; | 917 | serv->sv_stats->nettcpcnt++; |
1294 | 918 | ||
1295 | return len; | 919 | return len; |
1296 | 920 | ||
1297 | err_delete: | 921 | err_delete: |
1298 | svc_delete_socket(svsk); | 922 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
1299 | return -EAGAIN; | 923 | return -EAGAIN; |
1300 | 924 | ||
1301 | error: | 925 | error: |
1302 | if (len == -EAGAIN) { | 926 | if (len == -EAGAIN) { |
1303 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); | 927 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); |
1304 | svc_sock_received(svsk); | 928 | svc_xprt_received(&svsk->sk_xprt); |
1305 | } else { | 929 | } else { |
1306 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", | 930 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", |
1307 | svsk->sk_server->sv_name, -len); | 931 | svsk->sk_xprt.xpt_server->sv_name, -len); |
1308 | goto err_delete; | 932 | goto err_delete; |
1309 | } | 933 | } |
1310 | 934 | ||
@@ -1314,8 +938,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
1314 | /* | 938 | /* |
1315 | * Send out data on TCP socket. | 939 | * Send out data on TCP socket. |
1316 | */ | 940 | */ |
1317 | static int | 941 | static int svc_tcp_sendto(struct svc_rqst *rqstp) |
1318 | svc_tcp_sendto(struct svc_rqst *rqstp) | ||
1319 | { | 942 | { |
1320 | struct xdr_buf *xbufp = &rqstp->rq_res; | 943 | struct xdr_buf *xbufp = &rqstp->rq_res; |
1321 | int sent; | 944 | int sent; |
@@ -1328,35 +951,109 @@ svc_tcp_sendto(struct svc_rqst *rqstp) | |||
1328 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); | 951 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); |
1329 | memcpy(xbufp->head[0].iov_base, &reclen, 4); | 952 | memcpy(xbufp->head[0].iov_base, &reclen, 4); |
1330 | 953 | ||
1331 | if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) | 954 | if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) |
1332 | return -ENOTCONN; | 955 | return -ENOTCONN; |
1333 | 956 | ||
1334 | sent = svc_sendto(rqstp, &rqstp->rq_res); | 957 | sent = svc_sendto(rqstp, &rqstp->rq_res); |
1335 | if (sent != xbufp->len) { | 958 | if (sent != xbufp->len) { |
1336 | printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", | 959 | printk(KERN_NOTICE |
1337 | rqstp->rq_sock->sk_server->sv_name, | 960 | "rpc-srv/tcp: %s: %s %d when sending %d bytes " |
961 | "- shutting down socket\n", | ||
962 | rqstp->rq_xprt->xpt_server->sv_name, | ||
1338 | (sent<0)?"got error":"sent only", | 963 | (sent<0)?"got error":"sent only", |
1339 | sent, xbufp->len); | 964 | sent, xbufp->len); |
1340 | set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); | 965 | set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags); |
1341 | svc_sock_enqueue(rqstp->rq_sock); | 966 | svc_xprt_enqueue(rqstp->rq_xprt); |
1342 | sent = -EAGAIN; | 967 | sent = -EAGAIN; |
1343 | } | 968 | } |
1344 | return sent; | 969 | return sent; |
1345 | } | 970 | } |
1346 | 971 | ||
1347 | static void | 972 | /* |
1348 | svc_tcp_init(struct svc_sock *svsk) | 973 | * Setup response header. TCP has a 4B record length field. |
974 | */ | ||
975 | static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) | ||
976 | { | ||
977 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
978 | |||
979 | /* tcp needs a space for the record length... */ | ||
980 | svc_putnl(resv, 0); | ||
981 | } | ||
982 | |||
983 | static int svc_tcp_has_wspace(struct svc_xprt *xprt) | ||
984 | { | ||
985 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
986 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
987 | int required; | ||
988 | int wspace; | ||
989 | |||
990 | /* | ||
991 | * Set the SOCK_NOSPACE flag before checking the available | ||
992 | * sock space. | ||
993 | */ | ||
994 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
995 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
996 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
997 | |||
998 | if (wspace < sk_stream_min_wspace(svsk->sk_sk)) | ||
999 | return 0; | ||
1000 | if (required * 2 > wspace) | ||
1001 | return 0; | ||
1002 | |||
1003 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
1004 | return 1; | ||
1005 | } | ||
1006 | |||
1007 | static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, | ||
1008 | struct sockaddr *sa, int salen, | ||
1009 | int flags) | ||
1010 | { | ||
1011 | return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); | ||
1012 | } | ||
1013 | |||
1014 | static struct svc_xprt_ops svc_tcp_ops = { | ||
1015 | .xpo_create = svc_tcp_create, | ||
1016 | .xpo_recvfrom = svc_tcp_recvfrom, | ||
1017 | .xpo_sendto = svc_tcp_sendto, | ||
1018 | .xpo_release_rqst = svc_release_skb, | ||
1019 | .xpo_detach = svc_sock_detach, | ||
1020 | .xpo_free = svc_sock_free, | ||
1021 | .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, | ||
1022 | .xpo_has_wspace = svc_tcp_has_wspace, | ||
1023 | .xpo_accept = svc_tcp_accept, | ||
1024 | }; | ||
1025 | |||
1026 | static struct svc_xprt_class svc_tcp_class = { | ||
1027 | .xcl_name = "tcp", | ||
1028 | .xcl_owner = THIS_MODULE, | ||
1029 | .xcl_ops = &svc_tcp_ops, | ||
1030 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, | ||
1031 | }; | ||
1032 | |||
1033 | void svc_init_xprt_sock(void) | ||
1034 | { | ||
1035 | svc_reg_xprt_class(&svc_tcp_class); | ||
1036 | svc_reg_xprt_class(&svc_udp_class); | ||
1037 | } | ||
1038 | |||
1039 | void svc_cleanup_xprt_sock(void) | ||
1040 | { | ||
1041 | svc_unreg_xprt_class(&svc_tcp_class); | ||
1042 | svc_unreg_xprt_class(&svc_udp_class); | ||
1043 | } | ||
1044 | |||
1045 | static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
1349 | { | 1046 | { |
1350 | struct sock *sk = svsk->sk_sk; | 1047 | struct sock *sk = svsk->sk_sk; |
1351 | struct tcp_sock *tp = tcp_sk(sk); | 1048 | struct tcp_sock *tp = tcp_sk(sk); |
1352 | 1049 | ||
1353 | svsk->sk_recvfrom = svc_tcp_recvfrom; | 1050 | svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); |
1354 | svsk->sk_sendto = svc_tcp_sendto; | 1051 | set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); |
1355 | |||
1356 | if (sk->sk_state == TCP_LISTEN) { | 1052 | if (sk->sk_state == TCP_LISTEN) { |
1357 | dprintk("setting up TCP socket for listening\n"); | 1053 | dprintk("setting up TCP socket for listening\n"); |
1054 | set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); | ||
1358 | sk->sk_data_ready = svc_tcp_listen_data_ready; | 1055 | sk->sk_data_ready = svc_tcp_listen_data_ready; |
1359 | set_bit(SK_CONN, &svsk->sk_flags); | 1056 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
1360 | } else { | 1057 | } else { |
1361 | dprintk("setting up TCP socket for reading\n"); | 1058 | dprintk("setting up TCP socket for reading\n"); |
1362 | sk->sk_state_change = svc_tcp_state_change; | 1059 | sk->sk_state_change = svc_tcp_state_change; |
@@ -1373,18 +1070,17 @@ svc_tcp_init(struct svc_sock *svsk) | |||
1373 | * svc_tcp_recvfrom will re-adjust if necessary | 1070 | * svc_tcp_recvfrom will re-adjust if necessary |
1374 | */ | 1071 | */ |
1375 | svc_sock_setbufsize(svsk->sk_sock, | 1072 | svc_sock_setbufsize(svsk->sk_sock, |
1376 | 3 * svsk->sk_server->sv_max_mesg, | 1073 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
1377 | 3 * svsk->sk_server->sv_max_mesg); | 1074 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
1378 | 1075 | ||
1379 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1076 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1380 | set_bit(SK_DATA, &svsk->sk_flags); | 1077 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
1381 | if (sk->sk_state != TCP_ESTABLISHED) | 1078 | if (sk->sk_state != TCP_ESTABLISHED) |
1382 | set_bit(SK_CLOSE, &svsk->sk_flags); | 1079 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
1383 | } | 1080 | } |
1384 | } | 1081 | } |
1385 | 1082 | ||
1386 | void | 1083 | void svc_sock_update_bufs(struct svc_serv *serv) |
1387 | svc_sock_update_bufs(struct svc_serv *serv) | ||
1388 | { | 1084 | { |
1389 | /* | 1085 | /* |
1390 | * The number of server threads has changed. Update | 1086 | * The number of server threads has changed. Update |
@@ -1395,232 +1091,18 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
1395 | spin_lock_bh(&serv->sv_lock); | 1091 | spin_lock_bh(&serv->sv_lock); |
1396 | list_for_each(le, &serv->sv_permsocks) { | 1092 | list_for_each(le, &serv->sv_permsocks) { |
1397 | struct svc_sock *svsk = | 1093 | struct svc_sock *svsk = |
1398 | list_entry(le, struct svc_sock, sk_list); | 1094 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
1399 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1095 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1400 | } | 1096 | } |
1401 | list_for_each(le, &serv->sv_tempsocks) { | 1097 | list_for_each(le, &serv->sv_tempsocks) { |
1402 | struct svc_sock *svsk = | 1098 | struct svc_sock *svsk = |
1403 | list_entry(le, struct svc_sock, sk_list); | 1099 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
1404 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1100 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
1405 | } | 1101 | } |
1406 | spin_unlock_bh(&serv->sv_lock); | 1102 | spin_unlock_bh(&serv->sv_lock); |
1407 | } | 1103 | } |
1408 | 1104 | ||
1409 | /* | 1105 | /* |
1410 | * Receive the next request on any socket. This code is carefully | ||
1411 | * organised not to touch any cachelines in the shared svc_serv | ||
1412 | * structure, only cachelines in the local svc_pool. | ||
1413 | */ | ||
1414 | int | ||
1415 | svc_recv(struct svc_rqst *rqstp, long timeout) | ||
1416 | { | ||
1417 | struct svc_sock *svsk = NULL; | ||
1418 | struct svc_serv *serv = rqstp->rq_server; | ||
1419 | struct svc_pool *pool = rqstp->rq_pool; | ||
1420 | int len, i; | ||
1421 | int pages; | ||
1422 | struct xdr_buf *arg; | ||
1423 | DECLARE_WAITQUEUE(wait, current); | ||
1424 | |||
1425 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
1426 | rqstp, timeout); | ||
1427 | |||
1428 | if (rqstp->rq_sock) | ||
1429 | printk(KERN_ERR | ||
1430 | "svc_recv: service %p, socket not NULL!\n", | ||
1431 | rqstp); | ||
1432 | if (waitqueue_active(&rqstp->rq_wait)) | ||
1433 | printk(KERN_ERR | ||
1434 | "svc_recv: service %p, wait queue active!\n", | ||
1435 | rqstp); | ||
1436 | |||
1437 | |||
1438 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
1439 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | ||
1440 | for (i=0; i < pages ; i++) | ||
1441 | while (rqstp->rq_pages[i] == NULL) { | ||
1442 | struct page *p = alloc_page(GFP_KERNEL); | ||
1443 | if (!p) | ||
1444 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | ||
1445 | rqstp->rq_pages[i] = p; | ||
1446 | } | ||
1447 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | ||
1448 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
1449 | |||
1450 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
1451 | arg = &rqstp->rq_arg; | ||
1452 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | ||
1453 | arg->head[0].iov_len = PAGE_SIZE; | ||
1454 | arg->pages = rqstp->rq_pages + 1; | ||
1455 | arg->page_base = 0; | ||
1456 | /* save at least one page for response */ | ||
1457 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
1458 | arg->len = (pages-1)*PAGE_SIZE; | ||
1459 | arg->tail[0].iov_len = 0; | ||
1460 | |||
1461 | try_to_freeze(); | ||
1462 | cond_resched(); | ||
1463 | if (signalled()) | ||
1464 | return -EINTR; | ||
1465 | |||
1466 | spin_lock_bh(&pool->sp_lock); | ||
1467 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { | ||
1468 | rqstp->rq_sock = svsk; | ||
1469 | atomic_inc(&svsk->sk_inuse); | ||
1470 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
1471 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
1472 | } else { | ||
1473 | /* No data pending. Go to sleep */ | ||
1474 | svc_thread_enqueue(pool, rqstp); | ||
1475 | |||
1476 | /* | ||
1477 | * We have to be able to interrupt this wait | ||
1478 | * to bring down the daemons ... | ||
1479 | */ | ||
1480 | set_current_state(TASK_INTERRUPTIBLE); | ||
1481 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
1482 | spin_unlock_bh(&pool->sp_lock); | ||
1483 | |||
1484 | schedule_timeout(timeout); | ||
1485 | |||
1486 | try_to_freeze(); | ||
1487 | |||
1488 | spin_lock_bh(&pool->sp_lock); | ||
1489 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
1490 | |||
1491 | if (!(svsk = rqstp->rq_sock)) { | ||
1492 | svc_thread_dequeue(pool, rqstp); | ||
1493 | spin_unlock_bh(&pool->sp_lock); | ||
1494 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
1495 | return signalled()? -EINTR : -EAGAIN; | ||
1496 | } | ||
1497 | } | ||
1498 | spin_unlock_bh(&pool->sp_lock); | ||
1499 | |||
1500 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", | ||
1501 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); | ||
1502 | len = svsk->sk_recvfrom(rqstp); | ||
1503 | dprintk("svc: got len=%d\n", len); | ||
1504 | |||
1505 | /* No data, incomplete (TCP) read, or accept() */ | ||
1506 | if (len == 0 || len == -EAGAIN) { | ||
1507 | rqstp->rq_res.len = 0; | ||
1508 | svc_sock_release(rqstp); | ||
1509 | return -EAGAIN; | ||
1510 | } | ||
1511 | svsk->sk_lastrecv = get_seconds(); | ||
1512 | clear_bit(SK_OLD, &svsk->sk_flags); | ||
1513 | |||
1514 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | ||
1515 | rqstp->rq_chandle.defer = svc_defer; | ||
1516 | |||
1517 | if (serv->sv_stats) | ||
1518 | serv->sv_stats->netcnt++; | ||
1519 | return len; | ||
1520 | } | ||
1521 | |||
1522 | /* | ||
1523 | * Drop request | ||
1524 | */ | ||
1525 | void | ||
1526 | svc_drop(struct svc_rqst *rqstp) | ||
1527 | { | ||
1528 | dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); | ||
1529 | svc_sock_release(rqstp); | ||
1530 | } | ||
1531 | |||
1532 | /* | ||
1533 | * Return reply to client. | ||
1534 | */ | ||
1535 | int | ||
1536 | svc_send(struct svc_rqst *rqstp) | ||
1537 | { | ||
1538 | struct svc_sock *svsk; | ||
1539 | int len; | ||
1540 | struct xdr_buf *xb; | ||
1541 | |||
1542 | if ((svsk = rqstp->rq_sock) == NULL) { | ||
1543 | printk(KERN_WARNING "NULL socket pointer in %s:%d\n", | ||
1544 | __FILE__, __LINE__); | ||
1545 | return -EFAULT; | ||
1546 | } | ||
1547 | |||
1548 | /* release the receive skb before sending the reply */ | ||
1549 | svc_release_skb(rqstp); | ||
1550 | |||
1551 | /* calculate over-all length */ | ||
1552 | xb = & rqstp->rq_res; | ||
1553 | xb->len = xb->head[0].iov_len + | ||
1554 | xb->page_len + | ||
1555 | xb->tail[0].iov_len; | ||
1556 | |||
1557 | /* Grab svsk->sk_mutex to serialize outgoing data. */ | ||
1558 | mutex_lock(&svsk->sk_mutex); | ||
1559 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
1560 | len = -ENOTCONN; | ||
1561 | else | ||
1562 | len = svsk->sk_sendto(rqstp); | ||
1563 | mutex_unlock(&svsk->sk_mutex); | ||
1564 | svc_sock_release(rqstp); | ||
1565 | |||
1566 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
1567 | return 0; | ||
1568 | return len; | ||
1569 | } | ||
1570 | |||
1571 | /* | ||
1572 | * Timer function to close old temporary sockets, using | ||
1573 | * a mark-and-sweep algorithm. | ||
1574 | */ | ||
1575 | static void | ||
1576 | svc_age_temp_sockets(unsigned long closure) | ||
1577 | { | ||
1578 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
1579 | struct svc_sock *svsk; | ||
1580 | struct list_head *le, *next; | ||
1581 | LIST_HEAD(to_be_aged); | ||
1582 | |||
1583 | dprintk("svc_age_temp_sockets\n"); | ||
1584 | |||
1585 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
1586 | /* busy, try again 1 sec later */ | ||
1587 | dprintk("svc_age_temp_sockets: busy\n"); | ||
1588 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
1589 | return; | ||
1590 | } | ||
1591 | |||
1592 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
1593 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1594 | |||
1595 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) | ||
1596 | continue; | ||
1597 | if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
1598 | continue; | ||
1599 | atomic_inc(&svsk->sk_inuse); | ||
1600 | list_move(le, &to_be_aged); | ||
1601 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1602 | set_bit(SK_DETACHED, &svsk->sk_flags); | ||
1603 | } | ||
1604 | spin_unlock_bh(&serv->sv_lock); | ||
1605 | |||
1606 | while (!list_empty(&to_be_aged)) { | ||
1607 | le = to_be_aged.next; | ||
1608 | /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ | ||
1609 | list_del_init(le); | ||
1610 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
1611 | |||
1612 | dprintk("queuing svsk %p for closing, %lu seconds old\n", | ||
1613 | svsk, get_seconds() - svsk->sk_lastrecv); | ||
1614 | |||
1615 | /* a thread will dequeue and close it soon */ | ||
1616 | svc_sock_enqueue(svsk); | ||
1617 | svc_sock_put(svsk); | ||
1618 | } | ||
1619 | |||
1620 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
1621 | } | ||
1622 | |||
1623 | /* | ||
1624 | * Initialize socket for RPC use and create svc_sock struct | 1106 | * Initialize socket for RPC use and create svc_sock struct |
1625 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. | 1107 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. |
1626 | */ | 1108 | */ |
@@ -1631,7 +1113,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1631 | struct svc_sock *svsk; | 1113 | struct svc_sock *svsk; |
1632 | struct sock *inet; | 1114 | struct sock *inet; |
1633 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); | 1115 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); |
1634 | int is_temporary = flags & SVC_SOCK_TEMPORARY; | ||
1635 | 1116 | ||
1636 | dprintk("svc: svc_setup_socket %p\n", sock); | 1117 | dprintk("svc: svc_setup_socket %p\n", sock); |
1637 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { | 1118 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { |
@@ -1651,44 +1132,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
1651 | return NULL; | 1132 | return NULL; |
1652 | } | 1133 | } |
1653 | 1134 | ||
1654 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
1655 | inet->sk_user_data = svsk; | 1135 | inet->sk_user_data = svsk; |
1656 | svsk->sk_sock = sock; | 1136 | svsk->sk_sock = sock; |
1657 | svsk->sk_sk = inet; | 1137 | svsk->sk_sk = inet; |
1658 | svsk->sk_ostate = inet->sk_state_change; | 1138 | svsk->sk_ostate = inet->sk_state_change; |
1659 | svsk->sk_odata = inet->sk_data_ready; | 1139 | svsk->sk_odata = inet->sk_data_ready; |
1660 | svsk->sk_owspace = inet->sk_write_space; | 1140 | svsk->sk_owspace = inet->sk_write_space; |
1661 | svsk->sk_server = serv; | ||
1662 | atomic_set(&svsk->sk_inuse, 1); | ||
1663 | svsk->sk_lastrecv = get_seconds(); | ||
1664 | spin_lock_init(&svsk->sk_lock); | ||
1665 | INIT_LIST_HEAD(&svsk->sk_deferred); | ||
1666 | INIT_LIST_HEAD(&svsk->sk_ready); | ||
1667 | mutex_init(&svsk->sk_mutex); | ||
1668 | 1141 | ||
1669 | /* Initialize the socket */ | 1142 | /* Initialize the socket */ |
1670 | if (sock->type == SOCK_DGRAM) | 1143 | if (sock->type == SOCK_DGRAM) |
1671 | svc_udp_init(svsk); | 1144 | svc_udp_init(svsk, serv); |
1672 | else | 1145 | else |
1673 | svc_tcp_init(svsk); | 1146 | svc_tcp_init(svsk, serv); |
1674 | |||
1675 | spin_lock_bh(&serv->sv_lock); | ||
1676 | if (is_temporary) { | ||
1677 | set_bit(SK_TEMP, &svsk->sk_flags); | ||
1678 | list_add(&svsk->sk_list, &serv->sv_tempsocks); | ||
1679 | serv->sv_tmpcnt++; | ||
1680 | if (serv->sv_temptimer.function == NULL) { | ||
1681 | /* setup timer to age temp sockets */ | ||
1682 | setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, | ||
1683 | (unsigned long)serv); | ||
1684 | mod_timer(&serv->sv_temptimer, | ||
1685 | jiffies + svc_conn_age_period * HZ); | ||
1686 | } | ||
1687 | } else { | ||
1688 | clear_bit(SK_TEMP, &svsk->sk_flags); | ||
1689 | list_add(&svsk->sk_list, &serv->sv_permsocks); | ||
1690 | } | ||
1691 | spin_unlock_bh(&serv->sv_lock); | ||
1692 | 1147 | ||
1693 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | 1148 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
1694 | svsk, svsk->sk_sk); | 1149 | svsk, svsk->sk_sk); |
@@ -1717,7 +1172,16 @@ int svc_addsock(struct svc_serv *serv, | |||
1717 | else { | 1172 | else { |
1718 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); | 1173 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); |
1719 | if (svsk) { | 1174 | if (svsk) { |
1720 | svc_sock_received(svsk); | 1175 | struct sockaddr_storage addr; |
1176 | struct sockaddr *sin = (struct sockaddr *)&addr; | ||
1177 | int salen; | ||
1178 | if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) | ||
1179 | svc_xprt_set_local(&svsk->sk_xprt, sin, salen); | ||
1180 | clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); | ||
1181 | spin_lock_bh(&serv->sv_lock); | ||
1182 | list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); | ||
1183 | spin_unlock_bh(&serv->sv_lock); | ||
1184 | svc_xprt_received(&svsk->sk_xprt); | ||
1721 | err = 0; | 1185 | err = 0; |
1722 | } | 1186 | } |
1723 | } | 1187 | } |
@@ -1733,14 +1197,19 @@ EXPORT_SYMBOL_GPL(svc_addsock); | |||
1733 | /* | 1197 | /* |
1734 | * Create socket for RPC service. | 1198 | * Create socket for RPC service. |
1735 | */ | 1199 | */ |
1736 | static int svc_create_socket(struct svc_serv *serv, int protocol, | 1200 | static struct svc_xprt *svc_create_socket(struct svc_serv *serv, |
1737 | struct sockaddr *sin, int len, int flags) | 1201 | int protocol, |
1202 | struct sockaddr *sin, int len, | ||
1203 | int flags) | ||
1738 | { | 1204 | { |
1739 | struct svc_sock *svsk; | 1205 | struct svc_sock *svsk; |
1740 | struct socket *sock; | 1206 | struct socket *sock; |
1741 | int error; | 1207 | int error; |
1742 | int type; | 1208 | int type; |
1743 | char buf[RPC_MAX_ADDRBUFLEN]; | 1209 | char buf[RPC_MAX_ADDRBUFLEN]; |
1210 | struct sockaddr_storage addr; | ||
1211 | struct sockaddr *newsin = (struct sockaddr *)&addr; | ||
1212 | int newlen; | ||
1744 | 1213 | ||
1745 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", | 1214 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", |
1746 | serv->sv_program->pg_name, protocol, | 1215 | serv->sv_program->pg_name, protocol, |
@@ -1749,13 +1218,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
1749 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { | 1218 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { |
1750 | printk(KERN_WARNING "svc: only UDP and TCP " | 1219 | printk(KERN_WARNING "svc: only UDP and TCP " |
1751 | "sockets supported\n"); | 1220 | "sockets supported\n"); |
1752 | return -EINVAL; | 1221 | return ERR_PTR(-EINVAL); |
1753 | } | 1222 | } |
1754 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | 1223 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; |
1755 | 1224 | ||
1756 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); | 1225 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); |
1757 | if (error < 0) | 1226 | if (error < 0) |
1758 | return error; | 1227 | return ERR_PTR(error); |
1759 | 1228 | ||
1760 | svc_reclassify_socket(sock); | 1229 | svc_reclassify_socket(sock); |
1761 | 1230 | ||
@@ -1765,203 +1234,55 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
1765 | if (error < 0) | 1234 | if (error < 0) |
1766 | goto bummer; | 1235 | goto bummer; |
1767 | 1236 | ||
1237 | newlen = len; | ||
1238 | error = kernel_getsockname(sock, newsin, &newlen); | ||
1239 | if (error < 0) | ||
1240 | goto bummer; | ||
1241 | |||
1768 | if (protocol == IPPROTO_TCP) { | 1242 | if (protocol == IPPROTO_TCP) { |
1769 | if ((error = kernel_listen(sock, 64)) < 0) | 1243 | if ((error = kernel_listen(sock, 64)) < 0) |
1770 | goto bummer; | 1244 | goto bummer; |
1771 | } | 1245 | } |
1772 | 1246 | ||
1773 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { | 1247 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { |
1774 | svc_sock_received(svsk); | 1248 | svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); |
1775 | return ntohs(inet_sk(svsk->sk_sk)->sport); | 1249 | return (struct svc_xprt *)svsk; |
1776 | } | 1250 | } |
1777 | 1251 | ||
1778 | bummer: | 1252 | bummer: |
1779 | dprintk("svc: svc_create_socket error = %d\n", -error); | 1253 | dprintk("svc: svc_create_socket error = %d\n", -error); |
1780 | sock_release(sock); | 1254 | sock_release(sock); |
1781 | return error; | 1255 | return ERR_PTR(error); |
1782 | } | 1256 | } |
1783 | 1257 | ||
1784 | /* | 1258 | /* |
1785 | * Remove a dead socket | 1259 | * Detach the svc_sock from the socket so that no |
1260 | * more callbacks occur. | ||
1786 | */ | 1261 | */ |
1787 | static void | 1262 | static void svc_sock_detach(struct svc_xprt *xprt) |
1788 | svc_delete_socket(struct svc_sock *svsk) | ||
1789 | { | 1263 | { |
1790 | struct svc_serv *serv; | 1264 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
1791 | struct sock *sk; | 1265 | struct sock *sk = svsk->sk_sk; |
1792 | |||
1793 | dprintk("svc: svc_delete_socket(%p)\n", svsk); | ||
1794 | 1266 | ||
1795 | serv = svsk->sk_server; | 1267 | dprintk("svc: svc_sock_detach(%p)\n", svsk); |
1796 | sk = svsk->sk_sk; | ||
1797 | 1268 | ||
1269 | /* put back the old socket callbacks */ | ||
1798 | sk->sk_state_change = svsk->sk_ostate; | 1270 | sk->sk_state_change = svsk->sk_ostate; |
1799 | sk->sk_data_ready = svsk->sk_odata; | 1271 | sk->sk_data_ready = svsk->sk_odata; |
1800 | sk->sk_write_space = svsk->sk_owspace; | 1272 | sk->sk_write_space = svsk->sk_owspace; |
1801 | |||
1802 | spin_lock_bh(&serv->sv_lock); | ||
1803 | |||
1804 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | ||
1805 | list_del_init(&svsk->sk_list); | ||
1806 | /* | ||
1807 | * We used to delete the svc_sock from whichever list | ||
1808 | * it's sk_ready node was on, but we don't actually | ||
1809 | * need to. This is because the only time we're called | ||
1810 | * while still attached to a queue, the queue itself | ||
1811 | * is about to be destroyed (in svc_destroy). | ||
1812 | */ | ||
1813 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { | ||
1814 | BUG_ON(atomic_read(&svsk->sk_inuse)<2); | ||
1815 | atomic_dec(&svsk->sk_inuse); | ||
1816 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | ||
1817 | serv->sv_tmpcnt--; | ||
1818 | } | ||
1819 | |||
1820 | spin_unlock_bh(&serv->sv_lock); | ||
1821 | } | ||
1822 | |||
1823 | static void svc_close_socket(struct svc_sock *svsk) | ||
1824 | { | ||
1825 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1826 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) | ||
1827 | /* someone else will have to effect the close */ | ||
1828 | return; | ||
1829 | |||
1830 | atomic_inc(&svsk->sk_inuse); | ||
1831 | svc_delete_socket(svsk); | ||
1832 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1833 | svc_sock_put(svsk); | ||
1834 | } | ||
1835 | |||
1836 | void svc_force_close_socket(struct svc_sock *svsk) | ||
1837 | { | ||
1838 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
1839 | if (test_bit(SK_BUSY, &svsk->sk_flags)) { | ||
1840 | /* Waiting to be processed, but no threads left, | ||
1841 | * So just remove it from the waiting list | ||
1842 | */ | ||
1843 | list_del_init(&svsk->sk_ready); | ||
1844 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
1845 | } | ||
1846 | svc_close_socket(svsk); | ||
1847 | } | ||
1848 | |||
1849 | /** | ||
1850 | * svc_makesock - Make a socket for nfsd and lockd | ||
1851 | * @serv: RPC server structure | ||
1852 | * @protocol: transport protocol to use | ||
1853 | * @port: port to use | ||
1854 | * @flags: requested socket characteristics | ||
1855 | * | ||
1856 | */ | ||
1857 | int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port, | ||
1858 | int flags) | ||
1859 | { | ||
1860 | struct sockaddr_in sin = { | ||
1861 | .sin_family = AF_INET, | ||
1862 | .sin_addr.s_addr = INADDR_ANY, | ||
1863 | .sin_port = htons(port), | ||
1864 | }; | ||
1865 | |||
1866 | dprintk("svc: creating socket proto = %d\n", protocol); | ||
1867 | return svc_create_socket(serv, protocol, (struct sockaddr *) &sin, | ||
1868 | sizeof(sin), flags); | ||
1869 | } | 1273 | } |
1870 | 1274 | ||
1871 | /* | 1275 | /* |
1872 | * Handle defer and revisit of requests | 1276 | * Free the svc_sock's socket resources and the svc_sock itself. |
1873 | */ | 1277 | */ |
1874 | 1278 | static void svc_sock_free(struct svc_xprt *xprt) | |
1875 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
1876 | { | 1279 | { |
1877 | struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); | 1280 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
1878 | struct svc_sock *svsk; | 1281 | dprintk("svc: svc_sock_free(%p)\n", svsk); |
1879 | 1282 | ||
1880 | if (too_many) { | 1283 | if (svsk->sk_sock->file) |
1881 | svc_sock_put(dr->svsk); | 1284 | sockfd_put(svsk->sk_sock); |
1882 | kfree(dr); | 1285 | else |
1883 | return; | 1286 | sock_release(svsk->sk_sock); |
1884 | } | 1287 | kfree(svsk); |
1885 | dprintk("revisit queued\n"); | ||
1886 | svsk = dr->svsk; | ||
1887 | dr->svsk = NULL; | ||
1888 | spin_lock(&svsk->sk_lock); | ||
1889 | list_add(&dr->handle.recent, &svsk->sk_deferred); | ||
1890 | spin_unlock(&svsk->sk_lock); | ||
1891 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1892 | svc_sock_enqueue(svsk); | ||
1893 | svc_sock_put(svsk); | ||
1894 | } | ||
1895 | |||
1896 | static struct cache_deferred_req * | ||
1897 | svc_defer(struct cache_req *req) | ||
1898 | { | ||
1899 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
1900 | int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len); | ||
1901 | struct svc_deferred_req *dr; | ||
1902 | |||
1903 | if (rqstp->rq_arg.page_len) | ||
1904 | return NULL; /* if more than a page, give up FIXME */ | ||
1905 | if (rqstp->rq_deferred) { | ||
1906 | dr = rqstp->rq_deferred; | ||
1907 | rqstp->rq_deferred = NULL; | ||
1908 | } else { | ||
1909 | int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
1910 | /* FIXME maybe discard if size too large */ | ||
1911 | dr = kmalloc(size, GFP_KERNEL); | ||
1912 | if (dr == NULL) | ||
1913 | return NULL; | ||
1914 | |||
1915 | dr->handle.owner = rqstp->rq_server; | ||
1916 | dr->prot = rqstp->rq_prot; | ||
1917 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | ||
1918 | dr->addrlen = rqstp->rq_addrlen; | ||
1919 | dr->daddr = rqstp->rq_daddr; | ||
1920 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
1921 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); | ||
1922 | } | ||
1923 | atomic_inc(&rqstp->rq_sock->sk_inuse); | ||
1924 | dr->svsk = rqstp->rq_sock; | ||
1925 | |||
1926 | dr->handle.revisit = svc_revisit; | ||
1927 | return &dr->handle; | ||
1928 | } | ||
1929 | |||
1930 | /* | ||
1931 | * recv data from a deferred request into an active one | ||
1932 | */ | ||
1933 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
1934 | { | ||
1935 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
1936 | |||
1937 | rqstp->rq_arg.head[0].iov_base = dr->args; | ||
1938 | rqstp->rq_arg.head[0].iov_len = dr->argslen<<2; | ||
1939 | rqstp->rq_arg.page_len = 0; | ||
1940 | rqstp->rq_arg.len = dr->argslen<<2; | ||
1941 | rqstp->rq_prot = dr->prot; | ||
1942 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | ||
1943 | rqstp->rq_addrlen = dr->addrlen; | ||
1944 | rqstp->rq_daddr = dr->daddr; | ||
1945 | rqstp->rq_respages = rqstp->rq_pages; | ||
1946 | return dr->argslen<<2; | ||
1947 | } | ||
1948 | |||
1949 | |||
1950 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | ||
1951 | { | ||
1952 | struct svc_deferred_req *dr = NULL; | ||
1953 | |||
1954 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) | ||
1955 | return NULL; | ||
1956 | spin_lock(&svsk->sk_lock); | ||
1957 | clear_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1958 | if (!list_empty(&svsk->sk_deferred)) { | ||
1959 | dr = list_entry(svsk->sk_deferred.next, | ||
1960 | struct svc_deferred_req, | ||
1961 | handle.recent); | ||
1962 | list_del_init(&dr->handle.recent); | ||
1963 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
1964 | } | ||
1965 | spin_unlock(&svsk->sk_lock); | ||
1966 | return dr; | ||
1967 | } | 1288 | } |
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index bada7de0c2fc..0f8c439b848a 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/sunrpc/types.h> | 18 | #include <linux/sunrpc/types.h> |
19 | #include <linux/sunrpc/sched.h> | 19 | #include <linux/sunrpc/sched.h> |
20 | #include <linux/sunrpc/stats.h> | 20 | #include <linux/sunrpc/stats.h> |
21 | #include <linux/sunrpc/svc_xprt.h> | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * Declare the debug flags here | 24 | * Declare the debug flags here |
@@ -55,6 +56,30 @@ rpc_unregister_sysctl(void) | |||
55 | } | 56 | } |
56 | } | 57 | } |
57 | 58 | ||
59 | static int proc_do_xprt(ctl_table *table, int write, struct file *file, | ||
60 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
61 | { | ||
62 | char tmpbuf[256]; | ||
63 | int len; | ||
64 | if ((*ppos && !write) || !*lenp) { | ||
65 | *lenp = 0; | ||
66 | return 0; | ||
67 | } | ||
68 | if (write) | ||
69 | return -EINVAL; | ||
70 | else { | ||
71 | len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); | ||
72 | if (!access_ok(VERIFY_WRITE, buffer, len)) | ||
73 | return -EFAULT; | ||
74 | |||
75 | if (__copy_to_user(buffer, tmpbuf, len)) | ||
76 | return -EFAULT; | ||
77 | } | ||
78 | *lenp -= len; | ||
79 | *ppos += len; | ||
80 | return 0; | ||
81 | } | ||
82 | |||
58 | static int | 83 | static int |
59 | proc_dodebug(ctl_table *table, int write, struct file *file, | 84 | proc_dodebug(ctl_table *table, int write, struct file *file, |
60 | void __user *buffer, size_t *lenp, loff_t *ppos) | 85 | void __user *buffer, size_t *lenp, loff_t *ppos) |
@@ -147,6 +172,12 @@ static ctl_table debug_table[] = { | |||
147 | .mode = 0644, | 172 | .mode = 0644, |
148 | .proc_handler = &proc_dodebug | 173 | .proc_handler = &proc_dodebug |
149 | }, | 174 | }, |
175 | { | ||
176 | .procname = "transports", | ||
177 | .maxlen = 256, | ||
178 | .mode = 0444, | ||
179 | .proc_handler = &proc_do_xprt, | ||
180 | }, | ||
150 | { .ctl_name = 0 } | 181 | { .ctl_name = 0 } |
151 | }; | 182 | }; |
152 | 183 | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 54264062ea69..995c3fdc16c2 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
@@ -96,11 +96,13 @@ xdr_encode_string(__be32 *p, const char *string) | |||
96 | EXPORT_SYMBOL(xdr_encode_string); | 96 | EXPORT_SYMBOL(xdr_encode_string); |
97 | 97 | ||
98 | __be32 * | 98 | __be32 * |
99 | xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) | 99 | xdr_decode_string_inplace(__be32 *p, char **sp, |
100 | unsigned int *lenp, unsigned int maxlen) | ||
100 | { | 101 | { |
101 | unsigned int len; | 102 | u32 len; |
102 | 103 | ||
103 | if ((len = ntohl(*p++)) > maxlen) | 104 | len = ntohl(*p++); |
105 | if (len > maxlen) | ||
104 | return NULL; | 106 | return NULL; |
105 | *lenp = len; | 107 | *lenp = len; |
106 | *sp = (char *) p; | 108 | *sp = (char *) p; |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 264f0feeb513..5a8f268bdd30 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
@@ -1,3 +1,8 @@ | |||
1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o |
2 | 2 | ||
3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o |
4 | |||
5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o | ||
6 | |||
7 | svcrdma-y := svc_rdma.o svc_rdma_transport.o \ | ||
8 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c new file mode 100644 index 000000000000..88c0ca20bb1e --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma.c | |||
@@ -0,0 +1,266 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | #include <linux/module.h> | ||
42 | #include <linux/init.h> | ||
43 | #include <linux/fs.h> | ||
44 | #include <linux/sysctl.h> | ||
45 | #include <linux/sunrpc/clnt.h> | ||
46 | #include <linux/sunrpc/sched.h> | ||
47 | #include <linux/sunrpc/svc_rdma.h> | ||
48 | |||
49 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
50 | |||
51 | /* RPC/RDMA parameters */ | ||
52 | unsigned int svcrdma_ord = RPCRDMA_ORD; | ||
53 | static unsigned int min_ord = 1; | ||
54 | static unsigned int max_ord = 4096; | ||
55 | unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; | ||
56 | static unsigned int min_max_requests = 4; | ||
57 | static unsigned int max_max_requests = 16384; | ||
58 | unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; | ||
59 | static unsigned int min_max_inline = 4096; | ||
60 | static unsigned int max_max_inline = 65536; | ||
61 | |||
62 | atomic_t rdma_stat_recv; | ||
63 | atomic_t rdma_stat_read; | ||
64 | atomic_t rdma_stat_write; | ||
65 | atomic_t rdma_stat_sq_starve; | ||
66 | atomic_t rdma_stat_rq_starve; | ||
67 | atomic_t rdma_stat_rq_poll; | ||
68 | atomic_t rdma_stat_rq_prod; | ||
69 | atomic_t rdma_stat_sq_poll; | ||
70 | atomic_t rdma_stat_sq_prod; | ||
71 | |||
72 | /* | ||
73 | * This function implements reading and resetting an atomic_t stat | ||
74 | * variable through read/write to a proc file. Any write to the file | ||
75 | * resets the associated statistic to zero. Any read returns it's | ||
76 | * current value. | ||
77 | */ | ||
78 | static int read_reset_stat(ctl_table *table, int write, | ||
79 | struct file *filp, void __user *buffer, size_t *lenp, | ||
80 | loff_t *ppos) | ||
81 | { | ||
82 | atomic_t *stat = (atomic_t *)table->data; | ||
83 | |||
84 | if (!stat) | ||
85 | return -EINVAL; | ||
86 | |||
87 | if (write) | ||
88 | atomic_set(stat, 0); | ||
89 | else { | ||
90 | char str_buf[32]; | ||
91 | char *data; | ||
92 | int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat)); | ||
93 | if (len >= 32) | ||
94 | return -EFAULT; | ||
95 | len = strlen(str_buf); | ||
96 | if (*ppos > len) { | ||
97 | *lenp = 0; | ||
98 | return 0; | ||
99 | } | ||
100 | data = &str_buf[*ppos]; | ||
101 | len -= *ppos; | ||
102 | if (len > *lenp) | ||
103 | len = *lenp; | ||
104 | if (len && copy_to_user(buffer, str_buf, len)) | ||
105 | return -EFAULT; | ||
106 | *lenp = len; | ||
107 | *ppos += len; | ||
108 | } | ||
109 | return 0; | ||
110 | } | ||
111 | |||
112 | static struct ctl_table_header *svcrdma_table_header; | ||
113 | static ctl_table svcrdma_parm_table[] = { | ||
114 | { | ||
115 | .procname = "max_requests", | ||
116 | .data = &svcrdma_max_requests, | ||
117 | .maxlen = sizeof(unsigned int), | ||
118 | .mode = 0644, | ||
119 | .proc_handler = &proc_dointvec_minmax, | ||
120 | .strategy = &sysctl_intvec, | ||
121 | .extra1 = &min_max_requests, | ||
122 | .extra2 = &max_max_requests | ||
123 | }, | ||
124 | { | ||
125 | .procname = "max_req_size", | ||
126 | .data = &svcrdma_max_req_size, | ||
127 | .maxlen = sizeof(unsigned int), | ||
128 | .mode = 0644, | ||
129 | .proc_handler = &proc_dointvec_minmax, | ||
130 | .strategy = &sysctl_intvec, | ||
131 | .extra1 = &min_max_inline, | ||
132 | .extra2 = &max_max_inline | ||
133 | }, | ||
134 | { | ||
135 | .procname = "max_outbound_read_requests", | ||
136 | .data = &svcrdma_ord, | ||
137 | .maxlen = sizeof(unsigned int), | ||
138 | .mode = 0644, | ||
139 | .proc_handler = &proc_dointvec_minmax, | ||
140 | .strategy = &sysctl_intvec, | ||
141 | .extra1 = &min_ord, | ||
142 | .extra2 = &max_ord, | ||
143 | }, | ||
144 | |||
145 | { | ||
146 | .procname = "rdma_stat_read", | ||
147 | .data = &rdma_stat_read, | ||
148 | .maxlen = sizeof(atomic_t), | ||
149 | .mode = 0644, | ||
150 | .proc_handler = &read_reset_stat, | ||
151 | }, | ||
152 | { | ||
153 | .procname = "rdma_stat_recv", | ||
154 | .data = &rdma_stat_recv, | ||
155 | .maxlen = sizeof(atomic_t), | ||
156 | .mode = 0644, | ||
157 | .proc_handler = &read_reset_stat, | ||
158 | }, | ||
159 | { | ||
160 | .procname = "rdma_stat_write", | ||
161 | .data = &rdma_stat_write, | ||
162 | .maxlen = sizeof(atomic_t), | ||
163 | .mode = 0644, | ||
164 | .proc_handler = &read_reset_stat, | ||
165 | }, | ||
166 | { | ||
167 | .procname = "rdma_stat_sq_starve", | ||
168 | .data = &rdma_stat_sq_starve, | ||
169 | .maxlen = sizeof(atomic_t), | ||
170 | .mode = 0644, | ||
171 | .proc_handler = &read_reset_stat, | ||
172 | }, | ||
173 | { | ||
174 | .procname = "rdma_stat_rq_starve", | ||
175 | .data = &rdma_stat_rq_starve, | ||
176 | .maxlen = sizeof(atomic_t), | ||
177 | .mode = 0644, | ||
178 | .proc_handler = &read_reset_stat, | ||
179 | }, | ||
180 | { | ||
181 | .procname = "rdma_stat_rq_poll", | ||
182 | .data = &rdma_stat_rq_poll, | ||
183 | .maxlen = sizeof(atomic_t), | ||
184 | .mode = 0644, | ||
185 | .proc_handler = &read_reset_stat, | ||
186 | }, | ||
187 | { | ||
188 | .procname = "rdma_stat_rq_prod", | ||
189 | .data = &rdma_stat_rq_prod, | ||
190 | .maxlen = sizeof(atomic_t), | ||
191 | .mode = 0644, | ||
192 | .proc_handler = &read_reset_stat, | ||
193 | }, | ||
194 | { | ||
195 | .procname = "rdma_stat_sq_poll", | ||
196 | .data = &rdma_stat_sq_poll, | ||
197 | .maxlen = sizeof(atomic_t), | ||
198 | .mode = 0644, | ||
199 | .proc_handler = &read_reset_stat, | ||
200 | }, | ||
201 | { | ||
202 | .procname = "rdma_stat_sq_prod", | ||
203 | .data = &rdma_stat_sq_prod, | ||
204 | .maxlen = sizeof(atomic_t), | ||
205 | .mode = 0644, | ||
206 | .proc_handler = &read_reset_stat, | ||
207 | }, | ||
208 | { | ||
209 | .ctl_name = 0, | ||
210 | }, | ||
211 | }; | ||
212 | |||
213 | static ctl_table svcrdma_table[] = { | ||
214 | { | ||
215 | .procname = "svc_rdma", | ||
216 | .mode = 0555, | ||
217 | .child = svcrdma_parm_table | ||
218 | }, | ||
219 | { | ||
220 | .ctl_name = 0, | ||
221 | }, | ||
222 | }; | ||
223 | |||
224 | static ctl_table svcrdma_root_table[] = { | ||
225 | { | ||
226 | .ctl_name = CTL_SUNRPC, | ||
227 | .procname = "sunrpc", | ||
228 | .mode = 0555, | ||
229 | .child = svcrdma_table | ||
230 | }, | ||
231 | { | ||
232 | .ctl_name = 0, | ||
233 | }, | ||
234 | }; | ||
235 | |||
236 | void svc_rdma_cleanup(void) | ||
237 | { | ||
238 | dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); | ||
239 | if (svcrdma_table_header) { | ||
240 | unregister_sysctl_table(svcrdma_table_header); | ||
241 | svcrdma_table_header = NULL; | ||
242 | } | ||
243 | svc_unreg_xprt_class(&svc_rdma_class); | ||
244 | } | ||
245 | |||
246 | int svc_rdma_init(void) | ||
247 | { | ||
248 | dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); | ||
249 | dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); | ||
250 | dprintk("\tmax_requests : %d\n", svcrdma_max_requests); | ||
251 | dprintk("\tsq_depth : %d\n", | ||
252 | svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); | ||
253 | dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); | ||
254 | if (!svcrdma_table_header) | ||
255 | svcrdma_table_header = | ||
256 | register_sysctl_table(svcrdma_root_table); | ||
257 | |||
258 | /* Register RDMA with the SVC transport switch */ | ||
259 | svc_reg_xprt_class(&svc_rdma_class); | ||
260 | return 0; | ||
261 | } | ||
262 | MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); | ||
263 | MODULE_DESCRIPTION("SVC RDMA Transport"); | ||
264 | MODULE_LICENSE("Dual BSD/GPL"); | ||
265 | module_init(svc_rdma_init); | ||
266 | module_exit(svc_rdma_cleanup); | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c new file mode 100644 index 000000000000..9530ef2d40dc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c | |||
@@ -0,0 +1,412 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/xdr.h> | ||
43 | #include <linux/sunrpc/debug.h> | ||
44 | #include <asm/unaligned.h> | ||
45 | #include <linux/sunrpc/rpc_rdma.h> | ||
46 | #include <linux/sunrpc/svc_rdma.h> | ||
47 | |||
48 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
49 | |||
50 | /* | ||
51 | * Decodes a read chunk list. The expected format is as follows: | ||
52 | * descrim : xdr_one | ||
53 | * position : u32 offset into XDR stream | ||
54 | * handle : u32 RKEY | ||
55 | * . . . | ||
56 | * end-of-list: xdr_zero | ||
57 | */ | ||
58 | static u32 *decode_read_list(u32 *va, u32 *vaend) | ||
59 | { | ||
60 | struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; | ||
61 | |||
62 | while (ch->rc_discrim != xdr_zero) { | ||
63 | u64 ch_offset; | ||
64 | |||
65 | if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > | ||
66 | (unsigned long)vaend) { | ||
67 | dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | ch->rc_discrim = ntohl(ch->rc_discrim); | ||
72 | ch->rc_position = ntohl(ch->rc_position); | ||
73 | ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle); | ||
74 | ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length); | ||
75 | va = (u32 *)&ch->rc_target.rs_offset; | ||
76 | xdr_decode_hyper(va, &ch_offset); | ||
77 | put_unaligned(ch_offset, (u64 *)va); | ||
78 | ch++; | ||
79 | } | ||
80 | return (u32 *)&ch->rc_position; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Determine number of chunks and total bytes in chunk list. The chunk | ||
85 | * list has already been verified to fit within the RPCRDMA header. | ||
86 | */ | ||
87 | void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, | ||
88 | int *ch_count, int *byte_count) | ||
89 | { | ||
90 | /* compute the number of bytes represented by read chunks */ | ||
91 | *byte_count = 0; | ||
92 | *ch_count = 0; | ||
93 | for (; ch->rc_discrim != 0; ch++) { | ||
94 | *byte_count = *byte_count + ch->rc_target.rs_length; | ||
95 | *ch_count = *ch_count + 1; | ||
96 | } | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Decodes a write chunk list. The expected format is as follows: | ||
101 | * descrim : xdr_one | ||
102 | * nchunks : <count> | ||
103 | * handle : u32 RKEY ---+ | ||
104 | * length : u32 <len of segment> | | ||
105 | * offset : remove va + <count> | ||
106 | * . . . | | ||
107 | * ---+ | ||
108 | */ | ||
109 | static u32 *decode_write_list(u32 *va, u32 *vaend) | ||
110 | { | ||
111 | int ch_no; | ||
112 | struct rpcrdma_write_array *ary = | ||
113 | (struct rpcrdma_write_array *)va; | ||
114 | |||
115 | /* Check for not write-array */ | ||
116 | if (ary->wc_discrim == xdr_zero) | ||
117 | return (u32 *)&ary->wc_nchunks; | ||
118 | |||
119 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
120 | (unsigned long)vaend) { | ||
121 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
122 | return NULL; | ||
123 | } | ||
124 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
125 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
126 | if (((unsigned long)&ary->wc_array[0] + | ||
127 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
128 | (unsigned long)vaend) { | ||
129 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
130 | ary, ary->wc_nchunks, vaend); | ||
131 | return NULL; | ||
132 | } | ||
133 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
134 | u64 ch_offset; | ||
135 | |||
136 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
137 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
138 | ary->wc_array[ch_no].wc_target.rs_length = | ||
139 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
140 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
141 | xdr_decode_hyper(va, &ch_offset); | ||
142 | put_unaligned(ch_offset, (u64 *)va); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
147 | * address skips the list terminator | ||
148 | */ | ||
149 | return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length; | ||
150 | } | ||
151 | |||
152 | static u32 *decode_reply_array(u32 *va, u32 *vaend) | ||
153 | { | ||
154 | int ch_no; | ||
155 | struct rpcrdma_write_array *ary = | ||
156 | (struct rpcrdma_write_array *)va; | ||
157 | |||
158 | /* Check for no reply-array */ | ||
159 | if (ary->wc_discrim == xdr_zero) | ||
160 | return (u32 *)&ary->wc_nchunks; | ||
161 | |||
162 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
163 | (unsigned long)vaend) { | ||
164 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
165 | return NULL; | ||
166 | } | ||
167 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
168 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
169 | if (((unsigned long)&ary->wc_array[0] + | ||
170 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
171 | (unsigned long)vaend) { | ||
172 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
173 | ary, ary->wc_nchunks, vaend); | ||
174 | return NULL; | ||
175 | } | ||
176 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
177 | u64 ch_offset; | ||
178 | |||
179 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
180 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
181 | ary->wc_array[ch_no].wc_target.rs_length = | ||
182 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
183 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
184 | xdr_decode_hyper(va, &ch_offset); | ||
185 | put_unaligned(ch_offset, (u64 *)va); | ||
186 | } | ||
187 | |||
188 | return (u32 *)&ary->wc_array[ch_no]; | ||
189 | } | ||
190 | |||
191 | int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, | ||
192 | struct svc_rqst *rqstp) | ||
193 | { | ||
194 | struct rpcrdma_msg *rmsgp = NULL; | ||
195 | u32 *va; | ||
196 | u32 *vaend; | ||
197 | u32 hdr_len; | ||
198 | |||
199 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
200 | |||
201 | /* Verify that there's enough bytes for header + something */ | ||
202 | if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { | ||
203 | dprintk("svcrdma: header too short = %d\n", | ||
204 | rqstp->rq_arg.len); | ||
205 | return -EINVAL; | ||
206 | } | ||
207 | |||
208 | /* Decode the header */ | ||
209 | rmsgp->rm_xid = ntohl(rmsgp->rm_xid); | ||
210 | rmsgp->rm_vers = ntohl(rmsgp->rm_vers); | ||
211 | rmsgp->rm_credit = ntohl(rmsgp->rm_credit); | ||
212 | rmsgp->rm_type = ntohl(rmsgp->rm_type); | ||
213 | |||
214 | if (rmsgp->rm_vers != RPCRDMA_VERSION) | ||
215 | return -ENOSYS; | ||
216 | |||
217 | /* Pull in the extra for the padded case and bump our pointer */ | ||
218 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
219 | int hdrlen; | ||
220 | rmsgp->rm_body.rm_padded.rm_align = | ||
221 | ntohl(rmsgp->rm_body.rm_padded.rm_align); | ||
222 | rmsgp->rm_body.rm_padded.rm_thresh = | ||
223 | ntohl(rmsgp->rm_body.rm_padded.rm_thresh); | ||
224 | |||
225 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
226 | rqstp->rq_arg.head[0].iov_base = va; | ||
227 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
228 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
229 | if (hdrlen > rqstp->rq_arg.len) | ||
230 | return -EINVAL; | ||
231 | return hdrlen; | ||
232 | } | ||
233 | |||
234 | /* The chunk list may contain either a read chunk list or a write | ||
235 | * chunk list and a reply chunk list. | ||
236 | */ | ||
237 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
238 | vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); | ||
239 | va = decode_read_list(va, vaend); | ||
240 | if (!va) | ||
241 | return -EINVAL; | ||
242 | va = decode_write_list(va, vaend); | ||
243 | if (!va) | ||
244 | return -EINVAL; | ||
245 | va = decode_reply_array(va, vaend); | ||
246 | if (!va) | ||
247 | return -EINVAL; | ||
248 | |||
249 | rqstp->rq_arg.head[0].iov_base = va; | ||
250 | hdr_len = (unsigned long)va - (unsigned long)rmsgp; | ||
251 | rqstp->rq_arg.head[0].iov_len -= hdr_len; | ||
252 | |||
253 | *rdma_req = rmsgp; | ||
254 | return hdr_len; | ||
255 | } | ||
256 | |||
257 | int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) | ||
258 | { | ||
259 | struct rpcrdma_msg *rmsgp = NULL; | ||
260 | struct rpcrdma_read_chunk *ch; | ||
261 | struct rpcrdma_write_array *ary; | ||
262 | u32 *va; | ||
263 | u32 hdrlen; | ||
264 | |||
265 | dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", | ||
266 | rqstp); | ||
267 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
268 | |||
269 | /* Pull in the extra for the padded case and bump our pointer */ | ||
270 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
271 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
272 | rqstp->rq_arg.head[0].iov_base = va; | ||
273 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
274 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
275 | return hdrlen; | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Skip all chunks to find RPC msg. These were previously processed | ||
280 | */ | ||
281 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
282 | |||
283 | /* Skip read-list */ | ||
284 | for (ch = (struct rpcrdma_read_chunk *)va; | ||
285 | ch->rc_discrim != xdr_zero; ch++); | ||
286 | va = (u32 *)&ch->rc_position; | ||
287 | |||
288 | /* Skip write-list */ | ||
289 | ary = (struct rpcrdma_write_array *)va; | ||
290 | if (ary->wc_discrim == xdr_zero) | ||
291 | va = (u32 *)&ary->wc_nchunks; | ||
292 | else | ||
293 | /* | ||
294 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
295 | * address skips the list terminator | ||
296 | */ | ||
297 | va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; | ||
298 | |||
299 | /* Skip reply-array */ | ||
300 | ary = (struct rpcrdma_write_array *)va; | ||
301 | if (ary->wc_discrim == xdr_zero) | ||
302 | va = (u32 *)&ary->wc_nchunks; | ||
303 | else | ||
304 | va = (u32 *)&ary->wc_array[ary->wc_nchunks]; | ||
305 | |||
306 | rqstp->rq_arg.head[0].iov_base = va; | ||
307 | hdrlen = (unsigned long)va - (unsigned long)rmsgp; | ||
308 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
309 | |||
310 | return hdrlen; | ||
311 | } | ||
312 | |||
313 | int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, | ||
314 | struct rpcrdma_msg *rmsgp, | ||
315 | enum rpcrdma_errcode err, u32 *va) | ||
316 | { | ||
317 | u32 *startp = va; | ||
318 | |||
319 | *va++ = htonl(rmsgp->rm_xid); | ||
320 | *va++ = htonl(rmsgp->rm_vers); | ||
321 | *va++ = htonl(xprt->sc_max_requests); | ||
322 | *va++ = htonl(RDMA_ERROR); | ||
323 | *va++ = htonl(err); | ||
324 | if (err == ERR_VERS) { | ||
325 | *va++ = htonl(RPCRDMA_VERSION); | ||
326 | *va++ = htonl(RPCRDMA_VERSION); | ||
327 | } | ||
328 | |||
329 | return (int)((unsigned long)va - (unsigned long)startp); | ||
330 | } | ||
331 | |||
332 | int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) | ||
333 | { | ||
334 | struct rpcrdma_write_array *wr_ary; | ||
335 | |||
336 | /* There is no read-list in a reply */ | ||
337 | |||
338 | /* skip write list */ | ||
339 | wr_ary = (struct rpcrdma_write_array *) | ||
340 | &rmsgp->rm_body.rm_chunks[1]; | ||
341 | if (wr_ary->wc_discrim) | ||
342 | wr_ary = (struct rpcrdma_write_array *) | ||
343 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. | ||
344 | wc_target.rs_length; | ||
345 | else | ||
346 | wr_ary = (struct rpcrdma_write_array *) | ||
347 | &wr_ary->wc_nchunks; | ||
348 | |||
349 | /* skip reply array */ | ||
350 | if (wr_ary->wc_discrim) | ||
351 | wr_ary = (struct rpcrdma_write_array *) | ||
352 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; | ||
353 | else | ||
354 | wr_ary = (struct rpcrdma_write_array *) | ||
355 | &wr_ary->wc_nchunks; | ||
356 | |||
357 | return (unsigned long) wr_ary - (unsigned long) rmsgp; | ||
358 | } | ||
359 | |||
360 | void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) | ||
361 | { | ||
362 | struct rpcrdma_write_array *ary; | ||
363 | |||
364 | /* no read-list */ | ||
365 | rmsgp->rm_body.rm_chunks[0] = xdr_zero; | ||
366 | |||
367 | /* write-array discrim */ | ||
368 | ary = (struct rpcrdma_write_array *) | ||
369 | &rmsgp->rm_body.rm_chunks[1]; | ||
370 | ary->wc_discrim = xdr_one; | ||
371 | ary->wc_nchunks = htonl(chunks); | ||
372 | |||
373 | /* write-list terminator */ | ||
374 | ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; | ||
375 | |||
376 | /* reply-array discriminator */ | ||
377 | ary->wc_array[chunks].wc_target.rs_length = xdr_zero; | ||
378 | } | ||
379 | |||
380 | void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, | ||
381 | int chunks) | ||
382 | { | ||
383 | ary->wc_discrim = xdr_one; | ||
384 | ary->wc_nchunks = htonl(chunks); | ||
385 | } | ||
386 | |||
387 | void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, | ||
388 | int chunk_no, | ||
389 | u32 rs_handle, u64 rs_offset, | ||
390 | u32 write_len) | ||
391 | { | ||
392 | struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; | ||
393 | seg->rs_handle = htonl(rs_handle); | ||
394 | seg->rs_length = htonl(write_len); | ||
395 | xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset); | ||
396 | } | ||
397 | |||
398 | void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, | ||
399 | struct rpcrdma_msg *rdma_argp, | ||
400 | struct rpcrdma_msg *rdma_resp, | ||
401 | enum rpcrdma_proc rdma_type) | ||
402 | { | ||
403 | rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); | ||
404 | rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); | ||
405 | rdma_resp->rm_credit = htonl(xprt->sc_max_requests); | ||
406 | rdma_resp->rm_type = htonl(rdma_type); | ||
407 | |||
408 | /* Encode <nul> chunks lists */ | ||
409 | rdma_resp->rm_body.rm_chunks[0] = xdr_zero; | ||
410 | rdma_resp->rm_body.rm_chunks[1] = xdr_zero; | ||
411 | rdma_resp->rm_body.rm_chunks[2] = xdr_zero; | ||
412 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c new file mode 100644 index 000000000000..ab54a736486e --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
@@ -0,0 +1,586 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/debug.h> | ||
43 | #include <linux/sunrpc/rpc_rdma.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | #include <asm/unaligned.h> | ||
46 | #include <rdma/ib_verbs.h> | ||
47 | #include <rdma/rdma_cm.h> | ||
48 | #include <linux/sunrpc/svc_rdma.h> | ||
49 | |||
50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
51 | |||
52 | /* | ||
53 | * Replace the pages in the rq_argpages array with the pages from the SGE in | ||
54 | * the RDMA_RECV completion. The SGL should contain full pages up until the | ||
55 | * last one. | ||
56 | */ | ||
57 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | ||
58 | struct svc_rdma_op_ctxt *ctxt, | ||
59 | u32 byte_count) | ||
60 | { | ||
61 | struct page *page; | ||
62 | u32 bc; | ||
63 | int sge_no; | ||
64 | |||
65 | /* Swap the page in the SGE with the page in argpages */ | ||
66 | page = ctxt->pages[0]; | ||
67 | put_page(rqstp->rq_pages[0]); | ||
68 | rqstp->rq_pages[0] = page; | ||
69 | |||
70 | /* Set up the XDR head */ | ||
71 | rqstp->rq_arg.head[0].iov_base = page_address(page); | ||
72 | rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length); | ||
73 | rqstp->rq_arg.len = byte_count; | ||
74 | rqstp->rq_arg.buflen = byte_count; | ||
75 | |||
76 | /* Compute bytes past head in the SGL */ | ||
77 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | ||
78 | |||
79 | /* If data remains, store it in the pagelist */ | ||
80 | rqstp->rq_arg.page_len = bc; | ||
81 | rqstp->rq_arg.page_base = 0; | ||
82 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | ||
83 | sge_no = 1; | ||
84 | while (bc && sge_no < ctxt->count) { | ||
85 | page = ctxt->pages[sge_no]; | ||
86 | put_page(rqstp->rq_pages[sge_no]); | ||
87 | rqstp->rq_pages[sge_no] = page; | ||
88 | bc -= min(bc, ctxt->sge[sge_no].length); | ||
89 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; | ||
90 | sge_no++; | ||
91 | } | ||
92 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | ||
93 | |||
94 | /* We should never run out of SGE because the limit is defined to | ||
95 | * support the max allowed RPC data length | ||
96 | */ | ||
97 | BUG_ON(bc && (sge_no == ctxt->count)); | ||
98 | BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) | ||
99 | != byte_count); | ||
100 | BUG_ON(rqstp->rq_arg.len != byte_count); | ||
101 | |||
102 | /* If not all pages were used from the SGL, free the remaining ones */ | ||
103 | bc = sge_no; | ||
104 | while (sge_no < ctxt->count) { | ||
105 | page = ctxt->pages[sge_no++]; | ||
106 | put_page(page); | ||
107 | } | ||
108 | ctxt->count = bc; | ||
109 | |||
110 | /* Set up tail */ | ||
111 | rqstp->rq_arg.tail[0].iov_base = NULL; | ||
112 | rqstp->rq_arg.tail[0].iov_len = 0; | ||
113 | } | ||
114 | |||
115 | struct chunk_sge { | ||
116 | int start; /* sge no for this chunk */ | ||
117 | int count; /* sge count for this chunk */ | ||
118 | }; | ||
119 | |||
120 | /* Encode a read-chunk-list as an array of IB SGE | ||
121 | * | ||
122 | * Assumptions: | ||
123 | * - chunk[0]->position points to pages[0] at an offset of 0 | ||
124 | * - pages[] is not physically or virtually contigous and consists of | ||
125 | * PAGE_SIZE elements. | ||
126 | * | ||
127 | * Output: | ||
128 | * - sge array pointing into pages[] array. | ||
129 | * - chunk_sge array specifying sge index and count for each | ||
130 | * chunk in the read list | ||
131 | * | ||
132 | */ | ||
133 | static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | ||
134 | struct svc_rqst *rqstp, | ||
135 | struct svc_rdma_op_ctxt *head, | ||
136 | struct rpcrdma_msg *rmsgp, | ||
137 | struct ib_sge *sge, | ||
138 | struct chunk_sge *ch_sge_ary, | ||
139 | int ch_count, | ||
140 | int byte_count) | ||
141 | { | ||
142 | int sge_no; | ||
143 | int sge_bytes; | ||
144 | int page_off; | ||
145 | int page_no; | ||
146 | int ch_bytes; | ||
147 | int ch_no; | ||
148 | struct rpcrdma_read_chunk *ch; | ||
149 | |||
150 | sge_no = 0; | ||
151 | page_no = 0; | ||
152 | page_off = 0; | ||
153 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
154 | ch_no = 0; | ||
155 | ch_bytes = ch->rc_target.rs_length; | ||
156 | head->arg.head[0] = rqstp->rq_arg.head[0]; | ||
157 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | ||
158 | head->arg.pages = &head->pages[head->count]; | ||
159 | head->sge[0].length = head->count; /* save count of hdr pages */ | ||
160 | head->arg.page_base = 0; | ||
161 | head->arg.page_len = ch_bytes; | ||
162 | head->arg.len = rqstp->rq_arg.len + ch_bytes; | ||
163 | head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; | ||
164 | head->count++; | ||
165 | ch_sge_ary[0].start = 0; | ||
166 | while (byte_count) { | ||
167 | sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); | ||
168 | sge[sge_no].addr = | ||
169 | ib_dma_map_page(xprt->sc_cm_id->device, | ||
170 | rqstp->rq_arg.pages[page_no], | ||
171 | page_off, sge_bytes, | ||
172 | DMA_FROM_DEVICE); | ||
173 | sge[sge_no].length = sge_bytes; | ||
174 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
175 | /* | ||
176 | * Don't bump head->count here because the same page | ||
177 | * may be used by multiple SGE. | ||
178 | */ | ||
179 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; | ||
180 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | ||
181 | |||
182 | byte_count -= sge_bytes; | ||
183 | ch_bytes -= sge_bytes; | ||
184 | sge_no++; | ||
185 | /* | ||
186 | * If all bytes for this chunk have been mapped to an | ||
187 | * SGE, move to the next SGE | ||
188 | */ | ||
189 | if (ch_bytes == 0) { | ||
190 | ch_sge_ary[ch_no].count = | ||
191 | sge_no - ch_sge_ary[ch_no].start; | ||
192 | ch_no++; | ||
193 | ch++; | ||
194 | ch_sge_ary[ch_no].start = sge_no; | ||
195 | ch_bytes = ch->rc_target.rs_length; | ||
196 | /* If bytes remaining account for next chunk */ | ||
197 | if (byte_count) { | ||
198 | head->arg.page_len += ch_bytes; | ||
199 | head->arg.len += ch_bytes; | ||
200 | head->arg.buflen += ch_bytes; | ||
201 | } | ||
202 | } | ||
203 | /* | ||
204 | * If this SGE consumed all of the page, move to the | ||
205 | * next page | ||
206 | */ | ||
207 | if ((sge_bytes + page_off) == PAGE_SIZE) { | ||
208 | page_no++; | ||
209 | page_off = 0; | ||
210 | /* | ||
211 | * If there are still bytes left to map, bump | ||
212 | * the page count | ||
213 | */ | ||
214 | if (byte_count) | ||
215 | head->count++; | ||
216 | } else | ||
217 | page_off += sge_bytes; | ||
218 | } | ||
219 | BUG_ON(byte_count != 0); | ||
220 | return sge_no; | ||
221 | } | ||
222 | |||
223 | static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, | ||
224 | struct ib_sge *sge, | ||
225 | u64 *sgl_offset, | ||
226 | int count) | ||
227 | { | ||
228 | int i; | ||
229 | |||
230 | ctxt->count = count; | ||
231 | for (i = 0; i < count; i++) { | ||
232 | ctxt->sge[i].addr = sge[i].addr; | ||
233 | ctxt->sge[i].length = sge[i].length; | ||
234 | *sgl_offset = *sgl_offset + sge[i].length; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) | ||
239 | { | ||
240 | #ifdef RDMA_TRANSPORT_IWARP | ||
241 | if ((RDMA_TRANSPORT_IWARP == | ||
242 | rdma_node_get_transport(xprt->sc_cm_id-> | ||
243 | device->node_type)) | ||
244 | && sge_count > 1) | ||
245 | return 1; | ||
246 | else | ||
247 | #endif | ||
248 | return min_t(int, sge_count, xprt->sc_max_sge); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Use RDMA_READ to read data from the advertised client buffer into the | ||
253 | * XDR stream starting at rq_arg.head[0].iov_base. | ||
254 | * Each chunk in the array | ||
255 | * contains the following fields: | ||
256 | * discrim - '1', This isn't used for data placement | ||
257 | * position - The xdr stream offset (the same for every chunk) | ||
258 | * handle - RMR for client memory region | ||
259 | * length - data transfer length | ||
260 | * offset - 64 bit tagged offset in remote memory region | ||
261 | * | ||
262 | * On our side, we need to read into a pagelist. The first page immediately | ||
263 | * follows the RPC header. | ||
264 | * | ||
265 | * This function returns 1 to indicate success. The data is not yet in | ||
266 | * the pagelist and therefore the RPC request must be deferred. The | ||
267 | * I/O completion will enqueue the transport again and | ||
268 | * svc_rdma_recvfrom will complete the request. | ||
269 | * | ||
270 | * NOTE: The ctxt must not be touched after the last WR has been posted | ||
271 | * because the I/O completion processing may occur on another | ||
272 | * processor and free / modify the context. Ne touche pas! | ||
273 | */ | ||
274 | static int rdma_read_xdr(struct svcxprt_rdma *xprt, | ||
275 | struct rpcrdma_msg *rmsgp, | ||
276 | struct svc_rqst *rqstp, | ||
277 | struct svc_rdma_op_ctxt *hdr_ctxt) | ||
278 | { | ||
279 | struct ib_send_wr read_wr; | ||
280 | int err = 0; | ||
281 | int ch_no; | ||
282 | struct ib_sge *sge; | ||
283 | int ch_count; | ||
284 | int byte_count; | ||
285 | int sge_count; | ||
286 | u64 sgl_offset; | ||
287 | struct rpcrdma_read_chunk *ch; | ||
288 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
289 | struct svc_rdma_op_ctxt *head; | ||
290 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; | ||
291 | struct svc_rdma_op_ctxt *tmp_ch_ctxt; | ||
292 | struct chunk_sge *ch_sge_ary; | ||
293 | |||
294 | /* If no read list is present, return 0 */ | ||
295 | ch = svc_rdma_get_read_chunk(rmsgp); | ||
296 | if (!ch) | ||
297 | return 0; | ||
298 | |||
299 | /* Allocate temporary contexts to keep SGE */ | ||
300 | BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge)); | ||
301 | tmp_sge_ctxt = svc_rdma_get_context(xprt); | ||
302 | sge = tmp_sge_ctxt->sge; | ||
303 | tmp_ch_ctxt = svc_rdma_get_context(xprt); | ||
304 | ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; | ||
305 | |||
306 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); | ||
307 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, | ||
308 | sge, ch_sge_ary, | ||
309 | ch_count, byte_count); | ||
310 | head = svc_rdma_get_context(xprt); | ||
311 | sgl_offset = 0; | ||
312 | ch_no = 0; | ||
313 | |||
314 | for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
315 | ch->rc_discrim != 0; ch++, ch_no++) { | ||
316 | next_sge: | ||
317 | if (!ctxt) | ||
318 | ctxt = head; | ||
319 | else { | ||
320 | ctxt->next = svc_rdma_get_context(xprt); | ||
321 | ctxt = ctxt->next; | ||
322 | } | ||
323 | ctxt->next = NULL; | ||
324 | ctxt->direction = DMA_FROM_DEVICE; | ||
325 | clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); | ||
326 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | ||
327 | if ((ch+1)->rc_discrim == 0) { | ||
328 | /* | ||
329 | * Checked in sq_cq_reap to see if we need to | ||
330 | * be enqueued | ||
331 | */ | ||
332 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | ||
333 | ctxt->next = hdr_ctxt; | ||
334 | hdr_ctxt->next = head; | ||
335 | } | ||
336 | |||
337 | /* Prepare READ WR */ | ||
338 | memset(&read_wr, 0, sizeof read_wr); | ||
339 | ctxt->wr_op = IB_WR_RDMA_READ; | ||
340 | read_wr.wr_id = (unsigned long)ctxt; | ||
341 | read_wr.opcode = IB_WR_RDMA_READ; | ||
342 | read_wr.send_flags = IB_SEND_SIGNALED; | ||
343 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; | ||
344 | read_wr.wr.rdma.remote_addr = | ||
345 | get_unaligned(&(ch->rc_target.rs_offset)) + | ||
346 | sgl_offset; | ||
347 | read_wr.sg_list = &sge[ch_sge_ary[ch_no].start]; | ||
348 | read_wr.num_sge = | ||
349 | rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count); | ||
350 | rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], | ||
351 | &sgl_offset, | ||
352 | read_wr.num_sge); | ||
353 | |||
354 | /* Post the read */ | ||
355 | err = svc_rdma_send(xprt, &read_wr); | ||
356 | if (err) { | ||
357 | printk(KERN_ERR "svcrdma: Error posting send = %d\n", | ||
358 | err); | ||
359 | /* | ||
360 | * Break the circular list so free knows when | ||
361 | * to stop if the error happened to occur on | ||
362 | * the last read | ||
363 | */ | ||
364 | ctxt->next = NULL; | ||
365 | goto out; | ||
366 | } | ||
367 | atomic_inc(&rdma_stat_read); | ||
368 | |||
369 | if (read_wr.num_sge < ch_sge_ary[ch_no].count) { | ||
370 | ch_sge_ary[ch_no].count -= read_wr.num_sge; | ||
371 | ch_sge_ary[ch_no].start += read_wr.num_sge; | ||
372 | goto next_sge; | ||
373 | } | ||
374 | sgl_offset = 0; | ||
375 | err = 0; | ||
376 | } | ||
377 | |||
378 | out: | ||
379 | svc_rdma_put_context(tmp_sge_ctxt, 0); | ||
380 | svc_rdma_put_context(tmp_ch_ctxt, 0); | ||
381 | |||
382 | /* Detach arg pages. svc_recv will replenish them */ | ||
383 | for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) | ||
384 | rqstp->rq_pages[ch_no] = NULL; | ||
385 | |||
386 | /* | ||
387 | * Detach res pages. svc_release must see a resused count of | ||
388 | * zero or it will attempt to put them. | ||
389 | */ | ||
390 | while (rqstp->rq_resused) | ||
391 | rqstp->rq_respages[--rqstp->rq_resused] = NULL; | ||
392 | |||
393 | if (err) { | ||
394 | printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); | ||
395 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
396 | /* Free the linked list of read contexts */ | ||
397 | while (head != NULL) { | ||
398 | ctxt = head->next; | ||
399 | svc_rdma_put_context(head, 1); | ||
400 | head = ctxt; | ||
401 | } | ||
402 | return 0; | ||
403 | } | ||
404 | |||
405 | return 1; | ||
406 | } | ||
407 | |||
408 | static int rdma_read_complete(struct svc_rqst *rqstp, | ||
409 | struct svc_rdma_op_ctxt *data) | ||
410 | { | ||
411 | struct svc_rdma_op_ctxt *head = data->next; | ||
412 | int page_no; | ||
413 | int ret; | ||
414 | |||
415 | BUG_ON(!head); | ||
416 | |||
417 | /* Copy RPC pages */ | ||
418 | for (page_no = 0; page_no < head->count; page_no++) { | ||
419 | put_page(rqstp->rq_pages[page_no]); | ||
420 | rqstp->rq_pages[page_no] = head->pages[page_no]; | ||
421 | } | ||
422 | /* Point rq_arg.pages past header */ | ||
423 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length]; | ||
424 | rqstp->rq_arg.page_len = head->arg.page_len; | ||
425 | rqstp->rq_arg.page_base = head->arg.page_base; | ||
426 | |||
427 | /* rq_respages starts after the last arg page */ | ||
428 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | ||
429 | rqstp->rq_resused = 0; | ||
430 | |||
431 | /* Rebuild rq_arg head and tail. */ | ||
432 | rqstp->rq_arg.head[0] = head->arg.head[0]; | ||
433 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | ||
434 | rqstp->rq_arg.len = head->arg.len; | ||
435 | rqstp->rq_arg.buflen = head->arg.buflen; | ||
436 | |||
437 | /* XXX: What should this be? */ | ||
438 | rqstp->rq_prot = IPPROTO_MAX; | ||
439 | |||
440 | /* | ||
441 | * Free the contexts we used to build the RDMA_READ. We have | ||
442 | * to be careful here because the context list uses the same | ||
443 | * next pointer used to chain the contexts associated with the | ||
444 | * RDMA_READ | ||
445 | */ | ||
446 | data->next = NULL; /* terminate circular list */ | ||
447 | do { | ||
448 | data = head->next; | ||
449 | svc_rdma_put_context(head, 0); | ||
450 | head = data; | ||
451 | } while (head != NULL); | ||
452 | |||
453 | ret = rqstp->rq_arg.head[0].iov_len | ||
454 | + rqstp->rq_arg.page_len | ||
455 | + rqstp->rq_arg.tail[0].iov_len; | ||
456 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " | ||
457 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | ||
458 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, | ||
459 | rqstp->rq_arg.head[0].iov_len); | ||
460 | |||
461 | /* Indicate that we've consumed an RQ credit */ | ||
462 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
463 | svc_xprt_received(rqstp->rq_xprt); | ||
464 | return ret; | ||
465 | } | ||
466 | |||
467 | /* | ||
468 | * Set up the rqstp thread context to point to the RQ buffer. If | ||
469 | * necessary, pull additional data from the client with an RDMA_READ | ||
470 | * request. | ||
471 | */ | ||
472 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | ||
473 | { | ||
474 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
475 | struct svcxprt_rdma *rdma_xprt = | ||
476 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
477 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
478 | struct rpcrdma_msg *rmsgp; | ||
479 | int ret = 0; | ||
480 | int len; | ||
481 | |||
482 | dprintk("svcrdma: rqstp=%p\n", rqstp); | ||
483 | |||
484 | /* | ||
485 | * The rq_xprt_ctxt indicates if we've consumed an RQ credit | ||
486 | * or not. It is used in the rdma xpo_release_rqst function to | ||
487 | * determine whether or not to return an RQ WQE to the RQ. | ||
488 | */ | ||
489 | rqstp->rq_xprt_ctxt = NULL; | ||
490 | |||
491 | spin_lock_bh(&rdma_xprt->sc_read_complete_lock); | ||
492 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { | ||
493 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | ||
494 | struct svc_rdma_op_ctxt, | ||
495 | dto_q); | ||
496 | list_del_init(&ctxt->dto_q); | ||
497 | } | ||
498 | spin_unlock_bh(&rdma_xprt->sc_read_complete_lock); | ||
499 | if (ctxt) | ||
500 | return rdma_read_complete(rqstp, ctxt); | ||
501 | |||
502 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); | ||
503 | if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { | ||
504 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, | ||
505 | struct svc_rdma_op_ctxt, | ||
506 | dto_q); | ||
507 | list_del_init(&ctxt->dto_q); | ||
508 | } else { | ||
509 | atomic_inc(&rdma_stat_rq_starve); | ||
510 | clear_bit(XPT_DATA, &xprt->xpt_flags); | ||
511 | ctxt = NULL; | ||
512 | } | ||
513 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | ||
514 | if (!ctxt) { | ||
515 | /* This is the EAGAIN path. The svc_recv routine will | ||
516 | * return -EAGAIN, the nfsd thread will go to call into | ||
517 | * svc_recv again and we shouldn't be on the active | ||
518 | * transport list | ||
519 | */ | ||
520 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | ||
521 | goto close_out; | ||
522 | |||
523 | BUG_ON(ret); | ||
524 | goto out; | ||
525 | } | ||
526 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | ||
527 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | ||
528 | BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); | ||
529 | atomic_inc(&rdma_stat_recv); | ||
530 | |||
531 | /* Build up the XDR from the receive buffers. */ | ||
532 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | ||
533 | |||
534 | /* Decode the RDMA header. */ | ||
535 | len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); | ||
536 | rqstp->rq_xprt_hlen = len; | ||
537 | |||
538 | /* If the request is invalid, reply with an error */ | ||
539 | if (len < 0) { | ||
540 | if (len == -ENOSYS) | ||
541 | (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); | ||
542 | goto close_out; | ||
543 | } | ||
544 | |||
545 | /* Read read-list data. If we would need to wait, defer | ||
546 | * it. Not that in this case, we don't return the RQ credit | ||
547 | * until after the read completes. | ||
548 | */ | ||
549 | if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) { | ||
550 | svc_xprt_received(xprt); | ||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | /* Indicate we've consumed an RQ credit */ | ||
555 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
556 | |||
557 | ret = rqstp->rq_arg.head[0].iov_len | ||
558 | + rqstp->rq_arg.page_len | ||
559 | + rqstp->rq_arg.tail[0].iov_len; | ||
560 | svc_rdma_put_context(ctxt, 0); | ||
561 | out: | ||
562 | dprintk("svcrdma: ret = %d, rq_arg.len =%d, " | ||
563 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | ||
564 | ret, rqstp->rq_arg.len, | ||
565 | rqstp->rq_arg.head[0].iov_base, | ||
566 | rqstp->rq_arg.head[0].iov_len); | ||
567 | rqstp->rq_prot = IPPROTO_MAX; | ||
568 | svc_xprt_copy_addrs(rqstp, xprt); | ||
569 | svc_xprt_received(xprt); | ||
570 | return ret; | ||
571 | |||
572 | close_out: | ||
573 | if (ctxt) { | ||
574 | svc_rdma_put_context(ctxt, 1); | ||
575 | /* Indicate we've consumed an RQ credit */ | ||
576 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
577 | } | ||
578 | dprintk("svcrdma: transport %p is closing\n", xprt); | ||
579 | /* | ||
580 | * Set the close bit and enqueue it. svc_recv will see the | ||
581 | * close bit and call svc_xprt_delete | ||
582 | */ | ||
583 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
584 | svc_xprt_received(xprt); | ||
585 | return 0; | ||
586 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c new file mode 100644 index 000000000000..3e321949e1dc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
@@ -0,0 +1,520 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/debug.h> | ||
43 | #include <linux/sunrpc/rpc_rdma.h> | ||
44 | #include <linux/spinlock.h> | ||
45 | #include <asm/unaligned.h> | ||
46 | #include <rdma/ib_verbs.h> | ||
47 | #include <rdma/rdma_cm.h> | ||
48 | #include <linux/sunrpc/svc_rdma.h> | ||
49 | |||
50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
51 | |||
52 | /* Encode an XDR as an array of IB SGE | ||
53 | * | ||
54 | * Assumptions: | ||
55 | * - head[0] is physically contiguous. | ||
56 | * - tail[0] is physically contiguous. | ||
57 | * - pages[] is not physically or virtually contigous and consists of | ||
58 | * PAGE_SIZE elements. | ||
59 | * | ||
60 | * Output: | ||
61 | * SGE[0] reserved for RCPRDMA header | ||
62 | * SGE[1] data from xdr->head[] | ||
63 | * SGE[2..sge_count-2] data from xdr->pages[] | ||
64 | * SGE[sge_count-1] data from xdr->tail. | ||
65 | * | ||
66 | */ | ||
67 | static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, | ||
68 | struct xdr_buf *xdr, | ||
69 | struct ib_sge *sge, | ||
70 | int *sge_count) | ||
71 | { | ||
72 | /* Max we need is the length of the XDR / pagesize + one for | ||
73 | * head + one for tail + one for RPCRDMA header | ||
74 | */ | ||
75 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; | ||
76 | int sge_no; | ||
77 | u32 byte_count = xdr->len; | ||
78 | u32 sge_bytes; | ||
79 | u32 page_bytes; | ||
80 | int page_off; | ||
81 | int page_no; | ||
82 | |||
83 | /* Skip the first sge, this is for the RPCRDMA header */ | ||
84 | sge_no = 1; | ||
85 | |||
86 | /* Head SGE */ | ||
87 | sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, | ||
88 | xdr->head[0].iov_base, | ||
89 | xdr->head[0].iov_len, | ||
90 | DMA_TO_DEVICE); | ||
91 | sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len); | ||
92 | byte_count -= sge_bytes; | ||
93 | sge[sge_no].length = sge_bytes; | ||
94 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
95 | sge_no++; | ||
96 | |||
97 | /* pages SGE */ | ||
98 | page_no = 0; | ||
99 | page_bytes = xdr->page_len; | ||
100 | page_off = xdr->page_base; | ||
101 | while (byte_count && page_bytes) { | ||
102 | sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); | ||
103 | sge[sge_no].addr = | ||
104 | ib_dma_map_page(xprt->sc_cm_id->device, | ||
105 | xdr->pages[page_no], page_off, | ||
106 | sge_bytes, DMA_TO_DEVICE); | ||
107 | sge_bytes = min(sge_bytes, page_bytes); | ||
108 | byte_count -= sge_bytes; | ||
109 | page_bytes -= sge_bytes; | ||
110 | sge[sge_no].length = sge_bytes; | ||
111 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
112 | |||
113 | sge_no++; | ||
114 | page_no++; | ||
115 | page_off = 0; /* reset for next time through loop */ | ||
116 | } | ||
117 | |||
118 | /* Tail SGE */ | ||
119 | if (byte_count && xdr->tail[0].iov_len) { | ||
120 | sge[sge_no].addr = | ||
121 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
122 | xdr->tail[0].iov_base, | ||
123 | xdr->tail[0].iov_len, | ||
124 | DMA_TO_DEVICE); | ||
125 | sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len); | ||
126 | byte_count -= sge_bytes; | ||
127 | sge[sge_no].length = sge_bytes; | ||
128 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
129 | sge_no++; | ||
130 | } | ||
131 | |||
132 | BUG_ON(sge_no > sge_max); | ||
133 | BUG_ON(byte_count != 0); | ||
134 | |||
135 | *sge_count = sge_no; | ||
136 | return sge; | ||
137 | } | ||
138 | |||
139 | |||
140 | /* Assumptions: | ||
141 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE | ||
142 | */ | ||
143 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | ||
144 | u32 rmr, u64 to, | ||
145 | u32 xdr_off, int write_len, | ||
146 | struct ib_sge *xdr_sge, int sge_count) | ||
147 | { | ||
148 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; | ||
149 | struct ib_send_wr write_wr; | ||
150 | struct ib_sge *sge; | ||
151 | int xdr_sge_no; | ||
152 | int sge_no; | ||
153 | int sge_bytes; | ||
154 | int sge_off; | ||
155 | int bc; | ||
156 | struct svc_rdma_op_ctxt *ctxt; | ||
157 | int ret = 0; | ||
158 | |||
159 | BUG_ON(sge_count >= 32); | ||
160 | dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " | ||
161 | "write_len=%d, xdr_sge=%p, sge_count=%d\n", | ||
162 | rmr, to, xdr_off, write_len, xdr_sge, sge_count); | ||
163 | |||
164 | ctxt = svc_rdma_get_context(xprt); | ||
165 | ctxt->count = 0; | ||
166 | tmp_sge_ctxt = svc_rdma_get_context(xprt); | ||
167 | sge = tmp_sge_ctxt->sge; | ||
168 | |||
169 | /* Find the SGE associated with xdr_off */ | ||
170 | for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; | ||
171 | xdr_sge_no++) { | ||
172 | if (xdr_sge[xdr_sge_no].length > bc) | ||
173 | break; | ||
174 | bc -= xdr_sge[xdr_sge_no].length; | ||
175 | } | ||
176 | |||
177 | sge_off = bc; | ||
178 | bc = write_len; | ||
179 | sge_no = 0; | ||
180 | |||
181 | /* Copy the remaining SGE */ | ||
182 | while (bc != 0 && xdr_sge_no < sge_count) { | ||
183 | sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; | ||
184 | sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey; | ||
185 | sge_bytes = min((size_t)bc, | ||
186 | (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); | ||
187 | sge[sge_no].length = sge_bytes; | ||
188 | |||
189 | sge_off = 0; | ||
190 | sge_no++; | ||
191 | xdr_sge_no++; | ||
192 | bc -= sge_bytes; | ||
193 | } | ||
194 | |||
195 | BUG_ON(bc != 0); | ||
196 | BUG_ON(xdr_sge_no > sge_count); | ||
197 | |||
198 | /* Prepare WRITE WR */ | ||
199 | memset(&write_wr, 0, sizeof write_wr); | ||
200 | ctxt->wr_op = IB_WR_RDMA_WRITE; | ||
201 | write_wr.wr_id = (unsigned long)ctxt; | ||
202 | write_wr.sg_list = &sge[0]; | ||
203 | write_wr.num_sge = sge_no; | ||
204 | write_wr.opcode = IB_WR_RDMA_WRITE; | ||
205 | write_wr.send_flags = IB_SEND_SIGNALED; | ||
206 | write_wr.wr.rdma.rkey = rmr; | ||
207 | write_wr.wr.rdma.remote_addr = to; | ||
208 | |||
209 | /* Post It */ | ||
210 | atomic_inc(&rdma_stat_write); | ||
211 | if (svc_rdma_send(xprt, &write_wr)) { | ||
212 | svc_rdma_put_context(ctxt, 1); | ||
213 | /* Fatal error, close transport */ | ||
214 | ret = -EIO; | ||
215 | } | ||
216 | svc_rdma_put_context(tmp_sge_ctxt, 0); | ||
217 | return ret; | ||
218 | } | ||
219 | |||
220 | static int send_write_chunks(struct svcxprt_rdma *xprt, | ||
221 | struct rpcrdma_msg *rdma_argp, | ||
222 | struct rpcrdma_msg *rdma_resp, | ||
223 | struct svc_rqst *rqstp, | ||
224 | struct ib_sge *sge, | ||
225 | int sge_count) | ||
226 | { | ||
227 | u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; | ||
228 | int write_len; | ||
229 | int max_write; | ||
230 | u32 xdr_off; | ||
231 | int chunk_off; | ||
232 | int chunk_no; | ||
233 | struct rpcrdma_write_array *arg_ary; | ||
234 | struct rpcrdma_write_array *res_ary; | ||
235 | int ret; | ||
236 | |||
237 | arg_ary = svc_rdma_get_write_array(rdma_argp); | ||
238 | if (!arg_ary) | ||
239 | return 0; | ||
240 | res_ary = (struct rpcrdma_write_array *) | ||
241 | &rdma_resp->rm_body.rm_chunks[1]; | ||
242 | |||
243 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
244 | |||
245 | /* Write chunks start at the pagelist */ | ||
246 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; | ||
247 | xfer_len && chunk_no < arg_ary->wc_nchunks; | ||
248 | chunk_no++) { | ||
249 | struct rpcrdma_segment *arg_ch; | ||
250 | u64 rs_offset; | ||
251 | |||
252 | arg_ch = &arg_ary->wc_array[chunk_no].wc_target; | ||
253 | write_len = min(xfer_len, arg_ch->rs_length); | ||
254 | |||
255 | /* Prepare the response chunk given the length actually | ||
256 | * written */ | ||
257 | rs_offset = get_unaligned(&(arg_ch->rs_offset)); | ||
258 | svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, | ||
259 | arg_ch->rs_handle, | ||
260 | rs_offset, | ||
261 | write_len); | ||
262 | chunk_off = 0; | ||
263 | while (write_len) { | ||
264 | int this_write; | ||
265 | this_write = min(write_len, max_write); | ||
266 | ret = send_write(xprt, rqstp, | ||
267 | arg_ch->rs_handle, | ||
268 | rs_offset + chunk_off, | ||
269 | xdr_off, | ||
270 | this_write, | ||
271 | sge, | ||
272 | sge_count); | ||
273 | if (ret) { | ||
274 | dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", | ||
275 | ret); | ||
276 | return -EIO; | ||
277 | } | ||
278 | chunk_off += this_write; | ||
279 | xdr_off += this_write; | ||
280 | xfer_len -= this_write; | ||
281 | write_len -= this_write; | ||
282 | } | ||
283 | } | ||
284 | /* Update the req with the number of chunks actually used */ | ||
285 | svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); | ||
286 | |||
287 | return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; | ||
288 | } | ||
289 | |||
290 | static int send_reply_chunks(struct svcxprt_rdma *xprt, | ||
291 | struct rpcrdma_msg *rdma_argp, | ||
292 | struct rpcrdma_msg *rdma_resp, | ||
293 | struct svc_rqst *rqstp, | ||
294 | struct ib_sge *sge, | ||
295 | int sge_count) | ||
296 | { | ||
297 | u32 xfer_len = rqstp->rq_res.len; | ||
298 | int write_len; | ||
299 | int max_write; | ||
300 | u32 xdr_off; | ||
301 | int chunk_no; | ||
302 | int chunk_off; | ||
303 | struct rpcrdma_segment *ch; | ||
304 | struct rpcrdma_write_array *arg_ary; | ||
305 | struct rpcrdma_write_array *res_ary; | ||
306 | int ret; | ||
307 | |||
308 | arg_ary = svc_rdma_get_reply_array(rdma_argp); | ||
309 | if (!arg_ary) | ||
310 | return 0; | ||
311 | /* XXX: need to fix when reply lists occur with read-list and or | ||
312 | * write-list */ | ||
313 | res_ary = (struct rpcrdma_write_array *) | ||
314 | &rdma_resp->rm_body.rm_chunks[2]; | ||
315 | |||
316 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
317 | |||
318 | /* xdr offset starts at RPC message */ | ||
319 | for (xdr_off = 0, chunk_no = 0; | ||
320 | xfer_len && chunk_no < arg_ary->wc_nchunks; | ||
321 | chunk_no++) { | ||
322 | u64 rs_offset; | ||
323 | ch = &arg_ary->wc_array[chunk_no].wc_target; | ||
324 | write_len = min(xfer_len, ch->rs_length); | ||
325 | |||
326 | |||
327 | /* Prepare the reply chunk given the length actually | ||
328 | * written */ | ||
329 | rs_offset = get_unaligned(&(ch->rs_offset)); | ||
330 | svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, | ||
331 | ch->rs_handle, rs_offset, | ||
332 | write_len); | ||
333 | chunk_off = 0; | ||
334 | while (write_len) { | ||
335 | int this_write; | ||
336 | |||
337 | this_write = min(write_len, max_write); | ||
338 | ret = send_write(xprt, rqstp, | ||
339 | ch->rs_handle, | ||
340 | rs_offset + chunk_off, | ||
341 | xdr_off, | ||
342 | this_write, | ||
343 | sge, | ||
344 | sge_count); | ||
345 | if (ret) { | ||
346 | dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", | ||
347 | ret); | ||
348 | return -EIO; | ||
349 | } | ||
350 | chunk_off += this_write; | ||
351 | xdr_off += this_write; | ||
352 | xfer_len -= this_write; | ||
353 | write_len -= this_write; | ||
354 | } | ||
355 | } | ||
356 | /* Update the req with the number of chunks actually used */ | ||
357 | svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); | ||
358 | |||
359 | return rqstp->rq_res.len; | ||
360 | } | ||
361 | |||
362 | /* This function prepares the portion of the RPCRDMA message to be | ||
363 | * sent in the RDMA_SEND. This function is called after data sent via | ||
364 | * RDMA has already been transmitted. There are three cases: | ||
365 | * - The RPCRDMA header, RPC header, and payload are all sent in a | ||
366 | * single RDMA_SEND. This is the "inline" case. | ||
367 | * - The RPCRDMA header and some portion of the RPC header and data | ||
368 | * are sent via this RDMA_SEND and another portion of the data is | ||
369 | * sent via RDMA. | ||
370 | * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC | ||
371 | * header and data are all transmitted via RDMA. | ||
372 | * In all three cases, this function prepares the RPCRDMA header in | ||
373 | * sge[0], the 'type' parameter indicates the type to place in the | ||
374 | * RPCRDMA header, and the 'byte_count' field indicates how much of | ||
375 | * the XDR to include in this RDMA_SEND. | ||
376 | */ | ||
377 | static int send_reply(struct svcxprt_rdma *rdma, | ||
378 | struct svc_rqst *rqstp, | ||
379 | struct page *page, | ||
380 | struct rpcrdma_msg *rdma_resp, | ||
381 | struct svc_rdma_op_ctxt *ctxt, | ||
382 | int sge_count, | ||
383 | int byte_count) | ||
384 | { | ||
385 | struct ib_send_wr send_wr; | ||
386 | int sge_no; | ||
387 | int sge_bytes; | ||
388 | int page_no; | ||
389 | int ret; | ||
390 | |||
391 | /* Prepare the context */ | ||
392 | ctxt->pages[0] = page; | ||
393 | ctxt->count = 1; | ||
394 | |||
395 | /* Prepare the SGE for the RPCRDMA Header */ | ||
396 | ctxt->sge[0].addr = | ||
397 | ib_dma_map_page(rdma->sc_cm_id->device, | ||
398 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); | ||
399 | ctxt->direction = DMA_TO_DEVICE; | ||
400 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); | ||
401 | ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; | ||
402 | |||
403 | /* Determine how many of our SGE are to be transmitted */ | ||
404 | for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { | ||
405 | sge_bytes = min((size_t)ctxt->sge[sge_no].length, | ||
406 | (size_t)byte_count); | ||
407 | byte_count -= sge_bytes; | ||
408 | } | ||
409 | BUG_ON(byte_count != 0); | ||
410 | |||
411 | /* Save all respages in the ctxt and remove them from the | ||
412 | * respages array. They are our pages until the I/O | ||
413 | * completes. | ||
414 | */ | ||
415 | for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { | ||
416 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; | ||
417 | ctxt->count++; | ||
418 | rqstp->rq_respages[page_no] = NULL; | ||
419 | } | ||
420 | |||
421 | BUG_ON(sge_no > rdma->sc_max_sge); | ||
422 | memset(&send_wr, 0, sizeof send_wr); | ||
423 | ctxt->wr_op = IB_WR_SEND; | ||
424 | send_wr.wr_id = (unsigned long)ctxt; | ||
425 | send_wr.sg_list = ctxt->sge; | ||
426 | send_wr.num_sge = sge_no; | ||
427 | send_wr.opcode = IB_WR_SEND; | ||
428 | send_wr.send_flags = IB_SEND_SIGNALED; | ||
429 | |||
430 | ret = svc_rdma_send(rdma, &send_wr); | ||
431 | if (ret) | ||
432 | svc_rdma_put_context(ctxt, 1); | ||
433 | |||
434 | return ret; | ||
435 | } | ||
436 | |||
437 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) | ||
438 | { | ||
439 | } | ||
440 | |||
441 | /* | ||
442 | * Return the start of an xdr buffer. | ||
443 | */ | ||
444 | static void *xdr_start(struct xdr_buf *xdr) | ||
445 | { | ||
446 | return xdr->head[0].iov_base - | ||
447 | (xdr->len - | ||
448 | xdr->page_len - | ||
449 | xdr->tail[0].iov_len - | ||
450 | xdr->head[0].iov_len); | ||
451 | } | ||
452 | |||
453 | int svc_rdma_sendto(struct svc_rqst *rqstp) | ||
454 | { | ||
455 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
456 | struct svcxprt_rdma *rdma = | ||
457 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
458 | struct rpcrdma_msg *rdma_argp; | ||
459 | struct rpcrdma_msg *rdma_resp; | ||
460 | struct rpcrdma_write_array *reply_ary; | ||
461 | enum rpcrdma_proc reply_type; | ||
462 | int ret; | ||
463 | int inline_bytes; | ||
464 | struct ib_sge *sge; | ||
465 | int sge_count = 0; | ||
466 | struct page *res_page; | ||
467 | struct svc_rdma_op_ctxt *ctxt; | ||
468 | |||
469 | dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); | ||
470 | |||
471 | /* Get the RDMA request header. */ | ||
472 | rdma_argp = xdr_start(&rqstp->rq_arg); | ||
473 | |||
474 | /* Build an SGE for the XDR */ | ||
475 | ctxt = svc_rdma_get_context(rdma); | ||
476 | ctxt->direction = DMA_TO_DEVICE; | ||
477 | sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); | ||
478 | |||
479 | inline_bytes = rqstp->rq_res.len; | ||
480 | |||
481 | /* Create the RDMA response header */ | ||
482 | res_page = svc_rdma_get_page(); | ||
483 | rdma_resp = page_address(res_page); | ||
484 | reply_ary = svc_rdma_get_reply_array(rdma_argp); | ||
485 | if (reply_ary) | ||
486 | reply_type = RDMA_NOMSG; | ||
487 | else | ||
488 | reply_type = RDMA_MSG; | ||
489 | svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, | ||
490 | rdma_resp, reply_type); | ||
491 | |||
492 | /* Send any write-chunk data and build resp write-list */ | ||
493 | ret = send_write_chunks(rdma, rdma_argp, rdma_resp, | ||
494 | rqstp, sge, sge_count); | ||
495 | if (ret < 0) { | ||
496 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", | ||
497 | ret); | ||
498 | goto error; | ||
499 | } | ||
500 | inline_bytes -= ret; | ||
501 | |||
502 | /* Send any reply-list data and update resp reply-list */ | ||
503 | ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, | ||
504 | rqstp, sge, sge_count); | ||
505 | if (ret < 0) { | ||
506 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", | ||
507 | ret); | ||
508 | goto error; | ||
509 | } | ||
510 | inline_bytes -= ret; | ||
511 | |||
512 | ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, | ||
513 | inline_bytes); | ||
514 | dprintk("svcrdma: send_reply returns %d\n", ret); | ||
515 | return ret; | ||
516 | error: | ||
517 | svc_rdma_put_context(ctxt, 0); | ||
518 | put_page(res_page); | ||
519 | return ret; | ||
520 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c new file mode 100644 index 000000000000..f09444c451bc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
@@ -0,0 +1,1080 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
8 | * license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or without | ||
11 | * modification, are permitted provided that the following conditions | ||
12 | * are met: | ||
13 | * | ||
14 | * Redistributions of source code must retain the above copyright | ||
15 | * notice, this list of conditions and the following disclaimer. | ||
16 | * | ||
17 | * Redistributions in binary form must reproduce the above | ||
18 | * copyright notice, this list of conditions and the following | ||
19 | * disclaimer in the documentation and/or other materials provided | ||
20 | * with the distribution. | ||
21 | * | ||
22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
23 | * its contributors may be used to endorse or promote products | ||
24 | * derived from this software without specific prior written | ||
25 | * permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | * | ||
39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
40 | */ | ||
41 | |||
42 | #include <linux/sunrpc/svc_xprt.h> | ||
43 | #include <linux/sunrpc/debug.h> | ||
44 | #include <linux/sunrpc/rpc_rdma.h> | ||
45 | #include <linux/spinlock.h> | ||
46 | #include <rdma/ib_verbs.h> | ||
47 | #include <rdma/rdma_cm.h> | ||
48 | #include <linux/sunrpc/svc_rdma.h> | ||
49 | |||
50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
51 | |||
52 | static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | ||
53 | struct sockaddr *sa, int salen, | ||
54 | int flags); | ||
55 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); | ||
56 | static void svc_rdma_release_rqst(struct svc_rqst *); | ||
57 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt); | ||
58 | static void dto_tasklet_func(unsigned long data); | ||
59 | static void svc_rdma_detach(struct svc_xprt *xprt); | ||
60 | static void svc_rdma_free(struct svc_xprt *xprt); | ||
61 | static int svc_rdma_has_wspace(struct svc_xprt *xprt); | ||
62 | static void rq_cq_reap(struct svcxprt_rdma *xprt); | ||
63 | static void sq_cq_reap(struct svcxprt_rdma *xprt); | ||
64 | |||
65 | DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL); | ||
66 | static DEFINE_SPINLOCK(dto_lock); | ||
67 | static LIST_HEAD(dto_xprt_q); | ||
68 | |||
69 | static struct svc_xprt_ops svc_rdma_ops = { | ||
70 | .xpo_create = svc_rdma_create, | ||
71 | .xpo_recvfrom = svc_rdma_recvfrom, | ||
72 | .xpo_sendto = svc_rdma_sendto, | ||
73 | .xpo_release_rqst = svc_rdma_release_rqst, | ||
74 | .xpo_detach = svc_rdma_detach, | ||
75 | .xpo_free = svc_rdma_free, | ||
76 | .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, | ||
77 | .xpo_has_wspace = svc_rdma_has_wspace, | ||
78 | .xpo_accept = svc_rdma_accept, | ||
79 | }; | ||
80 | |||
81 | struct svc_xprt_class svc_rdma_class = { | ||
82 | .xcl_name = "rdma", | ||
83 | .xcl_owner = THIS_MODULE, | ||
84 | .xcl_ops = &svc_rdma_ops, | ||
85 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, | ||
86 | }; | ||
87 | |||
88 | static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) | ||
89 | { | ||
90 | int target; | ||
91 | int at_least_one = 0; | ||
92 | struct svc_rdma_op_ctxt *ctxt; | ||
93 | |||
94 | target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump, | ||
95 | xprt->sc_ctxt_max); | ||
96 | |||
97 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
98 | while (xprt->sc_ctxt_cnt < target) { | ||
99 | xprt->sc_ctxt_cnt++; | ||
100 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
101 | |||
102 | ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); | ||
103 | |||
104 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
105 | if (ctxt) { | ||
106 | at_least_one = 1; | ||
107 | ctxt->next = xprt->sc_ctxt_head; | ||
108 | xprt->sc_ctxt_head = ctxt; | ||
109 | } else { | ||
110 | /* kmalloc failed...give up for now */ | ||
111 | xprt->sc_ctxt_cnt--; | ||
112 | break; | ||
113 | } | ||
114 | } | ||
115 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
116 | dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n", | ||
117 | xprt->sc_ctxt_max, xprt->sc_ctxt_cnt); | ||
118 | return at_least_one; | ||
119 | } | ||
120 | |||
121 | struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | ||
122 | { | ||
123 | struct svc_rdma_op_ctxt *ctxt; | ||
124 | |||
125 | while (1) { | ||
126 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
127 | if (unlikely(xprt->sc_ctxt_head == NULL)) { | ||
128 | /* Try to bump my cache. */ | ||
129 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
130 | |||
131 | if (rdma_bump_context_cache(xprt)) | ||
132 | continue; | ||
133 | |||
134 | printk(KERN_INFO "svcrdma: sleeping waiting for " | ||
135 | "context memory on xprt=%p\n", | ||
136 | xprt); | ||
137 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | ||
138 | continue; | ||
139 | } | ||
140 | ctxt = xprt->sc_ctxt_head; | ||
141 | xprt->sc_ctxt_head = ctxt->next; | ||
142 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
143 | ctxt->xprt = xprt; | ||
144 | INIT_LIST_HEAD(&ctxt->dto_q); | ||
145 | ctxt->count = 0; | ||
146 | break; | ||
147 | } | ||
148 | return ctxt; | ||
149 | } | ||
150 | |||
151 | void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) | ||
152 | { | ||
153 | struct svcxprt_rdma *xprt; | ||
154 | int i; | ||
155 | |||
156 | BUG_ON(!ctxt); | ||
157 | xprt = ctxt->xprt; | ||
158 | if (free_pages) | ||
159 | for (i = 0; i < ctxt->count; i++) | ||
160 | put_page(ctxt->pages[i]); | ||
161 | |||
162 | for (i = 0; i < ctxt->count; i++) | ||
163 | dma_unmap_single(xprt->sc_cm_id->device->dma_device, | ||
164 | ctxt->sge[i].addr, | ||
165 | ctxt->sge[i].length, | ||
166 | ctxt->direction); | ||
167 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
168 | ctxt->next = xprt->sc_ctxt_head; | ||
169 | xprt->sc_ctxt_head = ctxt; | ||
170 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
171 | } | ||
172 | |||
173 | /* ib_cq event handler */ | ||
174 | static void cq_event_handler(struct ib_event *event, void *context) | ||
175 | { | ||
176 | struct svc_xprt *xprt = context; | ||
177 | dprintk("svcrdma: received CQ event id=%d, context=%p\n", | ||
178 | event->event, context); | ||
179 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
180 | } | ||
181 | |||
182 | /* QP event handler */ | ||
183 | static void qp_event_handler(struct ib_event *event, void *context) | ||
184 | { | ||
185 | struct svc_xprt *xprt = context; | ||
186 | |||
187 | switch (event->event) { | ||
188 | /* These are considered benign events */ | ||
189 | case IB_EVENT_PATH_MIG: | ||
190 | case IB_EVENT_COMM_EST: | ||
191 | case IB_EVENT_SQ_DRAINED: | ||
192 | case IB_EVENT_QP_LAST_WQE_REACHED: | ||
193 | dprintk("svcrdma: QP event %d received for QP=%p\n", | ||
194 | event->event, event->element.qp); | ||
195 | break; | ||
196 | /* These are considered fatal events */ | ||
197 | case IB_EVENT_PATH_MIG_ERR: | ||
198 | case IB_EVENT_QP_FATAL: | ||
199 | case IB_EVENT_QP_REQ_ERR: | ||
200 | case IB_EVENT_QP_ACCESS_ERR: | ||
201 | case IB_EVENT_DEVICE_FATAL: | ||
202 | default: | ||
203 | dprintk("svcrdma: QP ERROR event %d received for QP=%p, " | ||
204 | "closing transport\n", | ||
205 | event->event, event->element.qp); | ||
206 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
207 | break; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Data Transfer Operation Tasklet | ||
213 | * | ||
214 | * Walks a list of transports with I/O pending, removing entries as | ||
215 | * they are added to the server's I/O pending list. Two bits indicate | ||
216 | * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave | ||
217 | * spinlock that serializes access to the transport list with the RQ | ||
218 | * and SQ interrupt handlers. | ||
219 | */ | ||
220 | static void dto_tasklet_func(unsigned long data) | ||
221 | { | ||
222 | struct svcxprt_rdma *xprt; | ||
223 | unsigned long flags; | ||
224 | |||
225 | spin_lock_irqsave(&dto_lock, flags); | ||
226 | while (!list_empty(&dto_xprt_q)) { | ||
227 | xprt = list_entry(dto_xprt_q.next, | ||
228 | struct svcxprt_rdma, sc_dto_q); | ||
229 | list_del_init(&xprt->sc_dto_q); | ||
230 | spin_unlock_irqrestore(&dto_lock, flags); | ||
231 | |||
232 | if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { | ||
233 | ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); | ||
234 | rq_cq_reap(xprt); | ||
235 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
236 | /* | ||
237 | * If data arrived before established event, | ||
238 | * don't enqueue. This defers RPC I/O until the | ||
239 | * RDMA connection is complete. | ||
240 | */ | ||
241 | if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) | ||
242 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
243 | } | ||
244 | |||
245 | if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) { | ||
246 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); | ||
247 | sq_cq_reap(xprt); | ||
248 | } | ||
249 | |||
250 | spin_lock_irqsave(&dto_lock, flags); | ||
251 | } | ||
252 | spin_unlock_irqrestore(&dto_lock, flags); | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * Receive Queue Completion Handler | ||
257 | * | ||
258 | * Since an RQ completion handler is called on interrupt context, we | ||
259 | * need to defer the handling of the I/O to a tasklet | ||
260 | */ | ||
261 | static void rq_comp_handler(struct ib_cq *cq, void *cq_context) | ||
262 | { | ||
263 | struct svcxprt_rdma *xprt = cq_context; | ||
264 | unsigned long flags; | ||
265 | |||
266 | /* | ||
267 | * Set the bit regardless of whether or not it's on the list | ||
268 | * because it may be on the list already due to an SQ | ||
269 | * completion. | ||
270 | */ | ||
271 | set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); | ||
272 | |||
273 | /* | ||
274 | * If this transport is not already on the DTO transport queue, | ||
275 | * add it | ||
276 | */ | ||
277 | spin_lock_irqsave(&dto_lock, flags); | ||
278 | if (list_empty(&xprt->sc_dto_q)) | ||
279 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | ||
280 | spin_unlock_irqrestore(&dto_lock, flags); | ||
281 | |||
282 | /* Tasklet does all the work to avoid irqsave locks. */ | ||
283 | tasklet_schedule(&dto_tasklet); | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * rq_cq_reap - Process the RQ CQ. | ||
288 | * | ||
289 | * Take all completing WC off the CQE and enqueue the associated DTO | ||
290 | * context on the dto_q for the transport. | ||
291 | */ | ||
292 | static void rq_cq_reap(struct svcxprt_rdma *xprt) | ||
293 | { | ||
294 | int ret; | ||
295 | struct ib_wc wc; | ||
296 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
297 | |||
298 | atomic_inc(&rdma_stat_rq_poll); | ||
299 | |||
300 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
301 | while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { | ||
302 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
303 | ctxt->wc_status = wc.status; | ||
304 | ctxt->byte_len = wc.byte_len; | ||
305 | if (wc.status != IB_WC_SUCCESS) { | ||
306 | /* Close the transport */ | ||
307 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
308 | svc_rdma_put_context(ctxt, 1); | ||
309 | continue; | ||
310 | } | ||
311 | list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); | ||
312 | } | ||
313 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
314 | |||
315 | if (ctxt) | ||
316 | atomic_inc(&rdma_stat_rq_prod); | ||
317 | } | ||
318 | |||
319 | /* | ||
320 | * Send Queue Completion Handler - potentially called on interrupt context. | ||
321 | */ | ||
322 | static void sq_cq_reap(struct svcxprt_rdma *xprt) | ||
323 | { | ||
324 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
325 | struct ib_wc wc; | ||
326 | struct ib_cq *cq = xprt->sc_sq_cq; | ||
327 | int ret; | ||
328 | |||
329 | atomic_inc(&rdma_stat_sq_poll); | ||
330 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { | ||
331 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
332 | xprt = ctxt->xprt; | ||
333 | |||
334 | if (wc.status != IB_WC_SUCCESS) | ||
335 | /* Close the transport */ | ||
336 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
337 | |||
338 | /* Decrement used SQ WR count */ | ||
339 | atomic_dec(&xprt->sc_sq_count); | ||
340 | wake_up(&xprt->sc_send_wait); | ||
341 | |||
342 | switch (ctxt->wr_op) { | ||
343 | case IB_WR_SEND: | ||
344 | case IB_WR_RDMA_WRITE: | ||
345 | svc_rdma_put_context(ctxt, 1); | ||
346 | break; | ||
347 | |||
348 | case IB_WR_RDMA_READ: | ||
349 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
350 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
351 | set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); | ||
352 | spin_lock_bh(&xprt->sc_read_complete_lock); | ||
353 | list_add_tail(&ctxt->dto_q, | ||
354 | &xprt->sc_read_complete_q); | ||
355 | spin_unlock_bh(&xprt->sc_read_complete_lock); | ||
356 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
357 | } | ||
358 | break; | ||
359 | |||
360 | default: | ||
361 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
362 | "opcode=%d, status=%d\n", | ||
363 | wc.opcode, wc.status); | ||
364 | break; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | if (ctxt) | ||
369 | atomic_inc(&rdma_stat_sq_prod); | ||
370 | } | ||
371 | |||
372 | static void sq_comp_handler(struct ib_cq *cq, void *cq_context) | ||
373 | { | ||
374 | struct svcxprt_rdma *xprt = cq_context; | ||
375 | unsigned long flags; | ||
376 | |||
377 | /* | ||
378 | * Set the bit regardless of whether or not it's on the list | ||
379 | * because it may be on the list already due to an RQ | ||
380 | * completion. | ||
381 | */ | ||
382 | set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); | ||
383 | |||
384 | /* | ||
385 | * If this transport is not already on the DTO transport queue, | ||
386 | * add it | ||
387 | */ | ||
388 | spin_lock_irqsave(&dto_lock, flags); | ||
389 | if (list_empty(&xprt->sc_dto_q)) | ||
390 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | ||
391 | spin_unlock_irqrestore(&dto_lock, flags); | ||
392 | |||
393 | /* Tasklet does all the work to avoid irqsave locks. */ | ||
394 | tasklet_schedule(&dto_tasklet); | ||
395 | } | ||
396 | |||
397 | static void create_context_cache(struct svcxprt_rdma *xprt, | ||
398 | int ctxt_count, int ctxt_bump, int ctxt_max) | ||
399 | { | ||
400 | struct svc_rdma_op_ctxt *ctxt; | ||
401 | int i; | ||
402 | |||
403 | xprt->sc_ctxt_max = ctxt_max; | ||
404 | xprt->sc_ctxt_bump = ctxt_bump; | ||
405 | xprt->sc_ctxt_cnt = 0; | ||
406 | xprt->sc_ctxt_head = NULL; | ||
407 | for (i = 0; i < ctxt_count; i++) { | ||
408 | ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); | ||
409 | if (ctxt) { | ||
410 | ctxt->next = xprt->sc_ctxt_head; | ||
411 | xprt->sc_ctxt_head = ctxt; | ||
412 | xprt->sc_ctxt_cnt++; | ||
413 | } | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) | ||
418 | { | ||
419 | struct svc_rdma_op_ctxt *next; | ||
420 | if (!ctxt) | ||
421 | return; | ||
422 | |||
423 | do { | ||
424 | next = ctxt->next; | ||
425 | kfree(ctxt); | ||
426 | ctxt = next; | ||
427 | } while (next); | ||
428 | } | ||
429 | |||
430 | static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | ||
431 | int listener) | ||
432 | { | ||
433 | struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); | ||
434 | |||
435 | if (!cma_xprt) | ||
436 | return NULL; | ||
437 | svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv); | ||
438 | INIT_LIST_HEAD(&cma_xprt->sc_accept_q); | ||
439 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); | ||
440 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); | ||
441 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); | ||
442 | init_waitqueue_head(&cma_xprt->sc_send_wait); | ||
443 | |||
444 | spin_lock_init(&cma_xprt->sc_lock); | ||
445 | spin_lock_init(&cma_xprt->sc_read_complete_lock); | ||
446 | spin_lock_init(&cma_xprt->sc_ctxt_lock); | ||
447 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); | ||
448 | |||
449 | cma_xprt->sc_ord = svcrdma_ord; | ||
450 | |||
451 | cma_xprt->sc_max_req_size = svcrdma_max_req_size; | ||
452 | cma_xprt->sc_max_requests = svcrdma_max_requests; | ||
453 | cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; | ||
454 | atomic_set(&cma_xprt->sc_sq_count, 0); | ||
455 | |||
456 | if (!listener) { | ||
457 | int reqs = cma_xprt->sc_max_requests; | ||
458 | create_context_cache(cma_xprt, | ||
459 | reqs << 1, /* starting size */ | ||
460 | reqs, /* bump amount */ | ||
461 | reqs + | ||
462 | cma_xprt->sc_sq_depth + | ||
463 | RPCRDMA_MAX_THREADS + 1); /* max */ | ||
464 | if (!cma_xprt->sc_ctxt_head) { | ||
465 | kfree(cma_xprt); | ||
466 | return NULL; | ||
467 | } | ||
468 | clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); | ||
469 | } else | ||
470 | set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); | ||
471 | |||
472 | return cma_xprt; | ||
473 | } | ||
474 | |||
475 | struct page *svc_rdma_get_page(void) | ||
476 | { | ||
477 | struct page *page; | ||
478 | |||
479 | while ((page = alloc_page(GFP_KERNEL)) == NULL) { | ||
480 | /* If we can't get memory, wait a bit and try again */ | ||
481 | printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 " | ||
482 | "jiffies.\n"); | ||
483 | schedule_timeout_uninterruptible(msecs_to_jiffies(1000)); | ||
484 | } | ||
485 | return page; | ||
486 | } | ||
487 | |||
488 | int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | ||
489 | { | ||
490 | struct ib_recv_wr recv_wr, *bad_recv_wr; | ||
491 | struct svc_rdma_op_ctxt *ctxt; | ||
492 | struct page *page; | ||
493 | unsigned long pa; | ||
494 | int sge_no; | ||
495 | int buflen; | ||
496 | int ret; | ||
497 | |||
498 | ctxt = svc_rdma_get_context(xprt); | ||
499 | buflen = 0; | ||
500 | ctxt->direction = DMA_FROM_DEVICE; | ||
501 | for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { | ||
502 | BUG_ON(sge_no >= xprt->sc_max_sge); | ||
503 | page = svc_rdma_get_page(); | ||
504 | ctxt->pages[sge_no] = page; | ||
505 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | ||
506 | page, 0, PAGE_SIZE, | ||
507 | DMA_FROM_DEVICE); | ||
508 | ctxt->sge[sge_no].addr = pa; | ||
509 | ctxt->sge[sge_no].length = PAGE_SIZE; | ||
510 | ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
511 | buflen += PAGE_SIZE; | ||
512 | } | ||
513 | ctxt->count = sge_no; | ||
514 | recv_wr.next = NULL; | ||
515 | recv_wr.sg_list = &ctxt->sge[0]; | ||
516 | recv_wr.num_sge = ctxt->count; | ||
517 | recv_wr.wr_id = (u64)(unsigned long)ctxt; | ||
518 | |||
519 | ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); | ||
520 | return ret; | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * This function handles the CONNECT_REQUEST event on a listening | ||
525 | * endpoint. It is passed the cma_id for the _new_ connection. The context in | ||
526 | * this cma_id is inherited from the listening cma_id and is the svc_xprt | ||
527 | * structure for the listening endpoint. | ||
528 | * | ||
529 | * This function creates a new xprt for the new connection and enqueues it on | ||
530 | * the accept queue for the listent xprt. When the listen thread is kicked, it | ||
531 | * will call the recvfrom method on the listen xprt which will accept the new | ||
532 | * connection. | ||
533 | */ | ||
534 | static void handle_connect_req(struct rdma_cm_id *new_cma_id) | ||
535 | { | ||
536 | struct svcxprt_rdma *listen_xprt = new_cma_id->context; | ||
537 | struct svcxprt_rdma *newxprt; | ||
538 | |||
539 | /* Create a new transport */ | ||
540 | newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); | ||
541 | if (!newxprt) { | ||
542 | dprintk("svcrdma: failed to create new transport\n"); | ||
543 | return; | ||
544 | } | ||
545 | newxprt->sc_cm_id = new_cma_id; | ||
546 | new_cma_id->context = newxprt; | ||
547 | dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", | ||
548 | newxprt, newxprt->sc_cm_id, listen_xprt); | ||
549 | |||
550 | /* | ||
551 | * Enqueue the new transport on the accept queue of the listening | ||
552 | * transport | ||
553 | */ | ||
554 | spin_lock_bh(&listen_xprt->sc_lock); | ||
555 | list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q); | ||
556 | spin_unlock_bh(&listen_xprt->sc_lock); | ||
557 | |||
558 | /* | ||
559 | * Can't use svc_xprt_received here because we are not on a | ||
560 | * rqstp thread | ||
561 | */ | ||
562 | set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags); | ||
563 | svc_xprt_enqueue(&listen_xprt->sc_xprt); | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * Handles events generated on the listening endpoint. These events will be | ||
568 | * either be incoming connect requests or adapter removal events. | ||
569 | */ | ||
570 | static int rdma_listen_handler(struct rdma_cm_id *cma_id, | ||
571 | struct rdma_cm_event *event) | ||
572 | { | ||
573 | struct svcxprt_rdma *xprt = cma_id->context; | ||
574 | int ret = 0; | ||
575 | |||
576 | switch (event->event) { | ||
577 | case RDMA_CM_EVENT_CONNECT_REQUEST: | ||
578 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " | ||
579 | "event=%d\n", cma_id, cma_id->context, event->event); | ||
580 | handle_connect_req(cma_id); | ||
581 | break; | ||
582 | |||
583 | case RDMA_CM_EVENT_ESTABLISHED: | ||
584 | /* Accept complete */ | ||
585 | dprintk("svcrdma: Connection completed on LISTEN xprt=%p, " | ||
586 | "cm_id=%p\n", xprt, cma_id); | ||
587 | break; | ||
588 | |||
589 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
590 | dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", | ||
591 | xprt, cma_id); | ||
592 | if (xprt) | ||
593 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
594 | break; | ||
595 | |||
596 | default: | ||
597 | dprintk("svcrdma: Unexpected event on listening endpoint %p, " | ||
598 | "event=%d\n", cma_id, event->event); | ||
599 | break; | ||
600 | } | ||
601 | |||
602 | return ret; | ||
603 | } | ||
604 | |||
605 | static int rdma_cma_handler(struct rdma_cm_id *cma_id, | ||
606 | struct rdma_cm_event *event) | ||
607 | { | ||
608 | struct svc_xprt *xprt = cma_id->context; | ||
609 | struct svcxprt_rdma *rdma = | ||
610 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
611 | switch (event->event) { | ||
612 | case RDMA_CM_EVENT_ESTABLISHED: | ||
613 | /* Accept complete */ | ||
614 | dprintk("svcrdma: Connection completed on DTO xprt=%p, " | ||
615 | "cm_id=%p\n", xprt, cma_id); | ||
616 | clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); | ||
617 | svc_xprt_enqueue(xprt); | ||
618 | break; | ||
619 | case RDMA_CM_EVENT_DISCONNECTED: | ||
620 | dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n", | ||
621 | xprt, cma_id); | ||
622 | if (xprt) { | ||
623 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
624 | svc_xprt_enqueue(xprt); | ||
625 | } | ||
626 | break; | ||
627 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
628 | dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " | ||
629 | "event=%d\n", cma_id, xprt, event->event); | ||
630 | if (xprt) { | ||
631 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
632 | svc_xprt_enqueue(xprt); | ||
633 | } | ||
634 | break; | ||
635 | default: | ||
636 | dprintk("svcrdma: Unexpected event on DTO endpoint %p, " | ||
637 | "event=%d\n", cma_id, event->event); | ||
638 | break; | ||
639 | } | ||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | /* | ||
644 | * Create a listening RDMA service endpoint. | ||
645 | */ | ||
646 | static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | ||
647 | struct sockaddr *sa, int salen, | ||
648 | int flags) | ||
649 | { | ||
650 | struct rdma_cm_id *listen_id; | ||
651 | struct svcxprt_rdma *cma_xprt; | ||
652 | struct svc_xprt *xprt; | ||
653 | int ret; | ||
654 | |||
655 | dprintk("svcrdma: Creating RDMA socket\n"); | ||
656 | |||
657 | cma_xprt = rdma_create_xprt(serv, 1); | ||
658 | if (!cma_xprt) | ||
659 | return ERR_PTR(ENOMEM); | ||
660 | xprt = &cma_xprt->sc_xprt; | ||
661 | |||
662 | listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); | ||
663 | if (IS_ERR(listen_id)) { | ||
664 | rdma_destroy_xprt(cma_xprt); | ||
665 | dprintk("svcrdma: rdma_create_id failed = %ld\n", | ||
666 | PTR_ERR(listen_id)); | ||
667 | return (void *)listen_id; | ||
668 | } | ||
669 | ret = rdma_bind_addr(listen_id, sa); | ||
670 | if (ret) { | ||
671 | rdma_destroy_xprt(cma_xprt); | ||
672 | rdma_destroy_id(listen_id); | ||
673 | dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); | ||
674 | return ERR_PTR(ret); | ||
675 | } | ||
676 | cma_xprt->sc_cm_id = listen_id; | ||
677 | |||
678 | ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); | ||
679 | if (ret) { | ||
680 | rdma_destroy_id(listen_id); | ||
681 | rdma_destroy_xprt(cma_xprt); | ||
682 | dprintk("svcrdma: rdma_listen failed = %d\n", ret); | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * We need to use the address from the cm_id in case the | ||
687 | * caller specified 0 for the port number. | ||
688 | */ | ||
689 | sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr; | ||
690 | svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); | ||
691 | |||
692 | return &cma_xprt->sc_xprt; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * This is the xpo_recvfrom function for listening endpoints. Its | ||
697 | * purpose is to accept incoming connections. The CMA callback handler | ||
698 | * has already created a new transport and attached it to the new CMA | ||
699 | * ID. | ||
700 | * | ||
701 | * There is a queue of pending connections hung on the listening | ||
702 | * transport. This queue contains the new svc_xprt structure. This | ||
703 | * function takes svc_xprt structures off the accept_q and completes | ||
704 | * the connection. | ||
705 | */ | ||
706 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | ||
707 | { | ||
708 | struct svcxprt_rdma *listen_rdma; | ||
709 | struct svcxprt_rdma *newxprt = NULL; | ||
710 | struct rdma_conn_param conn_param; | ||
711 | struct ib_qp_init_attr qp_attr; | ||
712 | struct ib_device_attr devattr; | ||
713 | struct sockaddr *sa; | ||
714 | int ret; | ||
715 | int i; | ||
716 | |||
717 | listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
718 | clear_bit(XPT_CONN, &xprt->xpt_flags); | ||
719 | /* Get the next entry off the accept list */ | ||
720 | spin_lock_bh(&listen_rdma->sc_lock); | ||
721 | if (!list_empty(&listen_rdma->sc_accept_q)) { | ||
722 | newxprt = list_entry(listen_rdma->sc_accept_q.next, | ||
723 | struct svcxprt_rdma, sc_accept_q); | ||
724 | list_del_init(&newxprt->sc_accept_q); | ||
725 | } | ||
726 | if (!list_empty(&listen_rdma->sc_accept_q)) | ||
727 | set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags); | ||
728 | spin_unlock_bh(&listen_rdma->sc_lock); | ||
729 | if (!newxprt) | ||
730 | return NULL; | ||
731 | |||
732 | dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", | ||
733 | newxprt, newxprt->sc_cm_id); | ||
734 | |||
735 | ret = ib_query_device(newxprt->sc_cm_id->device, &devattr); | ||
736 | if (ret) { | ||
737 | dprintk("svcrdma: could not query device attributes on " | ||
738 | "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret); | ||
739 | goto errout; | ||
740 | } | ||
741 | |||
742 | /* Qualify the transport resource defaults with the | ||
743 | * capabilities of this particular device */ | ||
744 | newxprt->sc_max_sge = min((size_t)devattr.max_sge, | ||
745 | (size_t)RPCSVC_MAXPAGES); | ||
746 | newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr, | ||
747 | (size_t)svcrdma_max_requests); | ||
748 | newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; | ||
749 | |||
750 | newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom, | ||
751 | (size_t)svcrdma_ord); | ||
752 | |||
753 | newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); | ||
754 | if (IS_ERR(newxprt->sc_pd)) { | ||
755 | dprintk("svcrdma: error creating PD for connect request\n"); | ||
756 | goto errout; | ||
757 | } | ||
758 | newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device, | ||
759 | sq_comp_handler, | ||
760 | cq_event_handler, | ||
761 | newxprt, | ||
762 | newxprt->sc_sq_depth, | ||
763 | 0); | ||
764 | if (IS_ERR(newxprt->sc_sq_cq)) { | ||
765 | dprintk("svcrdma: error creating SQ CQ for connect request\n"); | ||
766 | goto errout; | ||
767 | } | ||
768 | newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device, | ||
769 | rq_comp_handler, | ||
770 | cq_event_handler, | ||
771 | newxprt, | ||
772 | newxprt->sc_max_requests, | ||
773 | 0); | ||
774 | if (IS_ERR(newxprt->sc_rq_cq)) { | ||
775 | dprintk("svcrdma: error creating RQ CQ for connect request\n"); | ||
776 | goto errout; | ||
777 | } | ||
778 | |||
779 | memset(&qp_attr, 0, sizeof qp_attr); | ||
780 | qp_attr.event_handler = qp_event_handler; | ||
781 | qp_attr.qp_context = &newxprt->sc_xprt; | ||
782 | qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; | ||
783 | qp_attr.cap.max_recv_wr = newxprt->sc_max_requests; | ||
784 | qp_attr.cap.max_send_sge = newxprt->sc_max_sge; | ||
785 | qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; | ||
786 | qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | ||
787 | qp_attr.qp_type = IB_QPT_RC; | ||
788 | qp_attr.send_cq = newxprt->sc_sq_cq; | ||
789 | qp_attr.recv_cq = newxprt->sc_rq_cq; | ||
790 | dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n" | ||
791 | " cm_id->device=%p, sc_pd->device=%p\n" | ||
792 | " cap.max_send_wr = %d\n" | ||
793 | " cap.max_recv_wr = %d\n" | ||
794 | " cap.max_send_sge = %d\n" | ||
795 | " cap.max_recv_sge = %d\n", | ||
796 | newxprt->sc_cm_id, newxprt->sc_pd, | ||
797 | newxprt->sc_cm_id->device, newxprt->sc_pd->device, | ||
798 | qp_attr.cap.max_send_wr, | ||
799 | qp_attr.cap.max_recv_wr, | ||
800 | qp_attr.cap.max_send_sge, | ||
801 | qp_attr.cap.max_recv_sge); | ||
802 | |||
803 | ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); | ||
804 | if (ret) { | ||
805 | /* | ||
806 | * XXX: This is a hack. We need a xx_request_qp interface | ||
807 | * that will adjust the qp_attr's with a best-effort | ||
808 | * number | ||
809 | */ | ||
810 | qp_attr.cap.max_send_sge -= 2; | ||
811 | qp_attr.cap.max_recv_sge -= 2; | ||
812 | ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, | ||
813 | &qp_attr); | ||
814 | if (ret) { | ||
815 | dprintk("svcrdma: failed to create QP, ret=%d\n", ret); | ||
816 | goto errout; | ||
817 | } | ||
818 | newxprt->sc_max_sge = qp_attr.cap.max_send_sge; | ||
819 | newxprt->sc_max_sge = qp_attr.cap.max_recv_sge; | ||
820 | newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; | ||
821 | newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; | ||
822 | } | ||
823 | newxprt->sc_qp = newxprt->sc_cm_id->qp; | ||
824 | |||
825 | /* Register all of physical memory */ | ||
826 | newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, | ||
827 | IB_ACCESS_LOCAL_WRITE | | ||
828 | IB_ACCESS_REMOTE_WRITE); | ||
829 | if (IS_ERR(newxprt->sc_phys_mr)) { | ||
830 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); | ||
831 | goto errout; | ||
832 | } | ||
833 | |||
834 | /* Post receive buffers */ | ||
835 | for (i = 0; i < newxprt->sc_max_requests; i++) { | ||
836 | ret = svc_rdma_post_recv(newxprt); | ||
837 | if (ret) { | ||
838 | dprintk("svcrdma: failure posting receive buffers\n"); | ||
839 | goto errout; | ||
840 | } | ||
841 | } | ||
842 | |||
843 | /* Swap out the handler */ | ||
844 | newxprt->sc_cm_id->event_handler = rdma_cma_handler; | ||
845 | |||
846 | /* Accept Connection */ | ||
847 | set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); | ||
848 | memset(&conn_param, 0, sizeof conn_param); | ||
849 | conn_param.responder_resources = 0; | ||
850 | conn_param.initiator_depth = newxprt->sc_ord; | ||
851 | ret = rdma_accept(newxprt->sc_cm_id, &conn_param); | ||
852 | if (ret) { | ||
853 | dprintk("svcrdma: failed to accept new connection, ret=%d\n", | ||
854 | ret); | ||
855 | goto errout; | ||
856 | } | ||
857 | |||
858 | dprintk("svcrdma: new connection %p accepted with the following " | ||
859 | "attributes:\n" | ||
860 | " local_ip : %d.%d.%d.%d\n" | ||
861 | " local_port : %d\n" | ||
862 | " remote_ip : %d.%d.%d.%d\n" | ||
863 | " remote_port : %d\n" | ||
864 | " max_sge : %d\n" | ||
865 | " sq_depth : %d\n" | ||
866 | " max_requests : %d\n" | ||
867 | " ord : %d\n", | ||
868 | newxprt, | ||
869 | NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
870 | route.addr.src_addr)->sin_addr.s_addr), | ||
871 | ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
872 | route.addr.src_addr)->sin_port), | ||
873 | NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
874 | route.addr.dst_addr)->sin_addr.s_addr), | ||
875 | ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
876 | route.addr.dst_addr)->sin_port), | ||
877 | newxprt->sc_max_sge, | ||
878 | newxprt->sc_sq_depth, | ||
879 | newxprt->sc_max_requests, | ||
880 | newxprt->sc_ord); | ||
881 | |||
882 | /* Set the local and remote addresses in the transport */ | ||
883 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; | ||
884 | svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); | ||
885 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; | ||
886 | svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); | ||
887 | |||
888 | ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); | ||
889 | ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); | ||
890 | return &newxprt->sc_xprt; | ||
891 | |||
892 | errout: | ||
893 | dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); | ||
894 | rdma_destroy_id(newxprt->sc_cm_id); | ||
895 | rdma_destroy_xprt(newxprt); | ||
896 | return NULL; | ||
897 | } | ||
898 | |||
899 | /* | ||
900 | * Post an RQ WQE to the RQ when the rqst is being released. This | ||
901 | * effectively returns an RQ credit to the client. The rq_xprt_ctxt | ||
902 | * will be null if the request is deferred due to an RDMA_READ or the | ||
903 | * transport had no data ready (EAGAIN). Note that an RPC deferred in | ||
904 | * svc_process will still return the credit, this is because the data | ||
905 | * is copied and no longer consume a WQE/WC. | ||
906 | */ | ||
907 | static void svc_rdma_release_rqst(struct svc_rqst *rqstp) | ||
908 | { | ||
909 | int err; | ||
910 | struct svcxprt_rdma *rdma = | ||
911 | container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); | ||
912 | if (rqstp->rq_xprt_ctxt) { | ||
913 | BUG_ON(rqstp->rq_xprt_ctxt != rdma); | ||
914 | err = svc_rdma_post_recv(rdma); | ||
915 | if (err) | ||
916 | dprintk("svcrdma: failed to post an RQ WQE error=%d\n", | ||
917 | err); | ||
918 | } | ||
919 | rqstp->rq_xprt_ctxt = NULL; | ||
920 | } | ||
921 | |||
922 | /* Disable data ready events for this connection */ | ||
923 | static void svc_rdma_detach(struct svc_xprt *xprt) | ||
924 | { | ||
925 | struct svcxprt_rdma *rdma = | ||
926 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
927 | unsigned long flags; | ||
928 | |||
929 | dprintk("svc: svc_rdma_detach(%p)\n", xprt); | ||
930 | /* | ||
931 | * Shutdown the connection. This will ensure we don't get any | ||
932 | * more events from the provider. | ||
933 | */ | ||
934 | rdma_disconnect(rdma->sc_cm_id); | ||
935 | rdma_destroy_id(rdma->sc_cm_id); | ||
936 | |||
937 | /* We may already be on the DTO list */ | ||
938 | spin_lock_irqsave(&dto_lock, flags); | ||
939 | if (!list_empty(&rdma->sc_dto_q)) | ||
940 | list_del_init(&rdma->sc_dto_q); | ||
941 | spin_unlock_irqrestore(&dto_lock, flags); | ||
942 | } | ||
943 | |||
944 | static void svc_rdma_free(struct svc_xprt *xprt) | ||
945 | { | ||
946 | struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; | ||
947 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); | ||
948 | rdma_destroy_xprt(rdma); | ||
949 | kfree(rdma); | ||
950 | } | ||
951 | |||
952 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt) | ||
953 | { | ||
954 | if (xprt->sc_qp && !IS_ERR(xprt->sc_qp)) | ||
955 | ib_destroy_qp(xprt->sc_qp); | ||
956 | |||
957 | if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq)) | ||
958 | ib_destroy_cq(xprt->sc_sq_cq); | ||
959 | |||
960 | if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq)) | ||
961 | ib_destroy_cq(xprt->sc_rq_cq); | ||
962 | |||
963 | if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr)) | ||
964 | ib_dereg_mr(xprt->sc_phys_mr); | ||
965 | |||
966 | if (xprt->sc_pd && !IS_ERR(xprt->sc_pd)) | ||
967 | ib_dealloc_pd(xprt->sc_pd); | ||
968 | |||
969 | destroy_context_cache(xprt->sc_ctxt_head); | ||
970 | } | ||
971 | |||
972 | static int svc_rdma_has_wspace(struct svc_xprt *xprt) | ||
973 | { | ||
974 | struct svcxprt_rdma *rdma = | ||
975 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
976 | |||
977 | /* | ||
978 | * If there are fewer SQ WR available than required to send a | ||
979 | * simple response, return false. | ||
980 | */ | ||
981 | if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3)) | ||
982 | return 0; | ||
983 | |||
984 | /* | ||
985 | * ...or there are already waiters on the SQ, | ||
986 | * return false. | ||
987 | */ | ||
988 | if (waitqueue_active(&rdma->sc_send_wait)) | ||
989 | return 0; | ||
990 | |||
991 | /* Otherwise return true. */ | ||
992 | return 1; | ||
993 | } | ||
994 | |||
995 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | ||
996 | { | ||
997 | struct ib_send_wr *bad_wr; | ||
998 | int ret; | ||
999 | |||
1000 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) | ||
1001 | return 0; | ||
1002 | |||
1003 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); | ||
1004 | BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != | ||
1005 | wr->opcode); | ||
1006 | /* If the SQ is full, wait until an SQ entry is available */ | ||
1007 | while (1) { | ||
1008 | spin_lock_bh(&xprt->sc_lock); | ||
1009 | if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { | ||
1010 | spin_unlock_bh(&xprt->sc_lock); | ||
1011 | atomic_inc(&rdma_stat_sq_starve); | ||
1012 | /* See if we can reap some SQ WR */ | ||
1013 | sq_cq_reap(xprt); | ||
1014 | |||
1015 | /* Wait until SQ WR available if SQ still full */ | ||
1016 | wait_event(xprt->sc_send_wait, | ||
1017 | atomic_read(&xprt->sc_sq_count) < | ||
1018 | xprt->sc_sq_depth); | ||
1019 | continue; | ||
1020 | } | ||
1021 | /* Bumped used SQ WR count and post */ | ||
1022 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); | ||
1023 | if (!ret) | ||
1024 | atomic_inc(&xprt->sc_sq_count); | ||
1025 | else | ||
1026 | dprintk("svcrdma: failed to post SQ WR rc=%d, " | ||
1027 | "sc_sq_count=%d, sc_sq_depth=%d\n", | ||
1028 | ret, atomic_read(&xprt->sc_sq_count), | ||
1029 | xprt->sc_sq_depth); | ||
1030 | spin_unlock_bh(&xprt->sc_lock); | ||
1031 | break; | ||
1032 | } | ||
1033 | return ret; | ||
1034 | } | ||
1035 | |||
1036 | int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | ||
1037 | enum rpcrdma_errcode err) | ||
1038 | { | ||
1039 | struct ib_send_wr err_wr; | ||
1040 | struct ib_sge sge; | ||
1041 | struct page *p; | ||
1042 | struct svc_rdma_op_ctxt *ctxt; | ||
1043 | u32 *va; | ||
1044 | int length; | ||
1045 | int ret; | ||
1046 | |||
1047 | p = svc_rdma_get_page(); | ||
1048 | va = page_address(p); | ||
1049 | |||
1050 | /* XDR encode error */ | ||
1051 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); | ||
1052 | |||
1053 | /* Prepare SGE for local address */ | ||
1054 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, | ||
1055 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); | ||
1056 | sge.lkey = xprt->sc_phys_mr->lkey; | ||
1057 | sge.length = length; | ||
1058 | |||
1059 | ctxt = svc_rdma_get_context(xprt); | ||
1060 | ctxt->count = 1; | ||
1061 | ctxt->pages[0] = p; | ||
1062 | |||
1063 | /* Prepare SEND WR */ | ||
1064 | memset(&err_wr, 0, sizeof err_wr); | ||
1065 | ctxt->wr_op = IB_WR_SEND; | ||
1066 | err_wr.wr_id = (unsigned long)ctxt; | ||
1067 | err_wr.sg_list = &sge; | ||
1068 | err_wr.num_sge = 1; | ||
1069 | err_wr.opcode = IB_WR_SEND; | ||
1070 | err_wr.send_flags = IB_SEND_SIGNALED; | ||
1071 | |||
1072 | /* Post It */ | ||
1073 | ret = svc_rdma_send(xprt, &err_wr); | ||
1074 | if (ret) { | ||
1075 | dprintk("svcrdma: Error posting send = %d\n", ret); | ||
1076 | svc_rdma_put_context(ctxt, 1); | ||
1077 | } | ||
1078 | |||
1079 | return ret; | ||
1080 | } | ||