diff options
60 files changed, 5450 insertions, 1664 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 91082e60d289..6cae13718925 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
| @@ -2247,7 +2247,7 @@ P: J. Bruce Fields | |||
| 2247 | M: bfields@fieldses.org | 2247 | M: bfields@fieldses.org |
| 2248 | P: Neil Brown | 2248 | P: Neil Brown |
| 2249 | M: neilb@suse.de | 2249 | M: neilb@suse.de |
| 2250 | L: nfs@lists.sourceforge.net | 2250 | L: linux-nfs@vger.kernel.org |
| 2251 | W: http://nfs.sourceforge.net/ | 2251 | W: http://nfs.sourceforge.net/ |
| 2252 | S: Supported | 2252 | S: Supported |
| 2253 | 2253 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index 219ec06a8c7e..987b5d7cb21a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -1674,6 +1674,8 @@ config NFSD | |||
| 1674 | select CRYPTO_MD5 if NFSD_V4 | 1674 | select CRYPTO_MD5 if NFSD_V4 |
| 1675 | select CRYPTO if NFSD_V4 | 1675 | select CRYPTO if NFSD_V4 |
| 1676 | select FS_POSIX_ACL if NFSD_V4 | 1676 | select FS_POSIX_ACL if NFSD_V4 |
| 1677 | select PROC_FS if NFSD_V4 | ||
| 1678 | select PROC_FS if SUNRPC_GSS | ||
| 1677 | help | 1679 | help |
| 1678 | If you want your Linux box to act as an NFS *server*, so that other | 1680 | If you want your Linux box to act as an NFS *server*, so that other |
| 1679 | computers on your local network which support NFS can access certain | 1681 | computers on your local network which support NFS can access certain |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 572601e98dcd..ca6b16fc3101 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
| @@ -34,10 +34,10 @@ static DEFINE_MUTEX(nlm_host_mutex); | |||
| 34 | 34 | ||
| 35 | static void nlm_gc_hosts(void); | 35 | static void nlm_gc_hosts(void); |
| 36 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, | 36 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, |
| 37 | const char *, int, int); | 37 | const char *, unsigned int, int); |
| 38 | static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, | 38 | static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, |
| 39 | const char *hostname, | 39 | const char *hostname, |
| 40 | int hostname_len); | 40 | unsigned int hostname_len); |
| 41 | 41 | ||
| 42 | /* | 42 | /* |
| 43 | * Common host lookup routine for server & client | 43 | * Common host lookup routine for server & client |
| @@ -45,7 +45,8 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, | |||
| 45 | static struct nlm_host * | 45 | static struct nlm_host * |
| 46 | nlm_lookup_host(int server, const struct sockaddr_in *sin, | 46 | nlm_lookup_host(int server, const struct sockaddr_in *sin, |
| 47 | int proto, int version, const char *hostname, | 47 | int proto, int version, const char *hostname, |
| 48 | int hostname_len, const struct sockaddr_in *ssin) | 48 | unsigned int hostname_len, |
| 49 | const struct sockaddr_in *ssin) | ||
| 49 | { | 50 | { |
| 50 | struct hlist_head *chain; | 51 | struct hlist_head *chain; |
| 51 | struct hlist_node *pos; | 52 | struct hlist_node *pos; |
| @@ -176,7 +177,7 @@ nlm_destroy_host(struct nlm_host *host) | |||
| 176 | */ | 177 | */ |
| 177 | struct nlm_host * | 178 | struct nlm_host * |
| 178 | nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, | 179 | nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, |
| 179 | const char *hostname, int hostname_len) | 180 | const char *hostname, unsigned int hostname_len) |
| 180 | { | 181 | { |
| 181 | struct sockaddr_in ssin = {0}; | 182 | struct sockaddr_in ssin = {0}; |
| 182 | 183 | ||
| @@ -189,7 +190,7 @@ nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, | |||
| 189 | */ | 190 | */ |
| 190 | struct nlm_host * | 191 | struct nlm_host * |
| 191 | nlmsvc_lookup_host(struct svc_rqst *rqstp, | 192 | nlmsvc_lookup_host(struct svc_rqst *rqstp, |
| 192 | const char *hostname, int hostname_len) | 193 | const char *hostname, unsigned int hostname_len) |
| 193 | { | 194 | { |
| 194 | struct sockaddr_in ssin = {0}; | 195 | struct sockaddr_in ssin = {0}; |
| 195 | 196 | ||
| @@ -307,7 +308,8 @@ void nlm_release_host(struct nlm_host *host) | |||
| 307 | * Release all resources held by that peer. | 308 | * Release all resources held by that peer. |
| 308 | */ | 309 | */ |
| 309 | void nlm_host_rebooted(const struct sockaddr_in *sin, | 310 | void nlm_host_rebooted(const struct sockaddr_in *sin, |
| 310 | const char *hostname, int hostname_len, | 311 | const char *hostname, |
| 312 | unsigned int hostname_len, | ||
| 311 | u32 new_state) | 313 | u32 new_state) |
| 312 | { | 314 | { |
| 313 | struct hlist_head *chain; | 315 | struct hlist_head *chain; |
| @@ -377,8 +379,13 @@ nlm_shutdown_hosts(void) | |||
| 377 | /* First, make all hosts eligible for gc */ | 379 | /* First, make all hosts eligible for gc */ |
| 378 | dprintk("lockd: nuking all hosts...\n"); | 380 | dprintk("lockd: nuking all hosts...\n"); |
| 379 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { | 381 | for (chain = nlm_hosts; chain < nlm_hosts + NLM_HOST_NRHASH; ++chain) { |
| 380 | hlist_for_each_entry(host, pos, chain, h_hash) | 382 | hlist_for_each_entry(host, pos, chain, h_hash) { |
| 381 | host->h_expires = jiffies - 1; | 383 | host->h_expires = jiffies - 1; |
| 384 | if (host->h_rpcclnt) { | ||
| 385 | rpc_shutdown_client(host->h_rpcclnt); | ||
| 386 | host->h_rpcclnt = NULL; | ||
| 387 | } | ||
| 388 | } | ||
| 382 | } | 389 | } |
| 383 | 390 | ||
| 384 | /* Then, perform a garbage collection pass */ | 391 | /* Then, perform a garbage collection pass */ |
| @@ -449,7 +456,7 @@ static DEFINE_MUTEX(nsm_mutex); | |||
| 449 | 456 | ||
| 450 | static struct nsm_handle * | 457 | static struct nsm_handle * |
| 451 | __nsm_find(const struct sockaddr_in *sin, | 458 | __nsm_find(const struct sockaddr_in *sin, |
| 452 | const char *hostname, int hostname_len, | 459 | const char *hostname, unsigned int hostname_len, |
| 453 | int create) | 460 | int create) |
| 454 | { | 461 | { |
| 455 | struct nsm_handle *nsm = NULL; | 462 | struct nsm_handle *nsm = NULL; |
| @@ -503,7 +510,8 @@ out: | |||
| 503 | } | 510 | } |
| 504 | 511 | ||
| 505 | static struct nsm_handle * | 512 | static struct nsm_handle * |
| 506 | nsm_find(const struct sockaddr_in *sin, const char *hostname, int hostname_len) | 513 | nsm_find(const struct sockaddr_in *sin, const char *hostname, |
| 514 | unsigned int hostname_len) | ||
| 507 | { | 515 | { |
| 508 | return __nsm_find(sin, hostname, hostname_len, 1); | 516 | return __nsm_find(sin, hostname, hostname_len, 1); |
| 509 | } | 517 | } |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 82e2192a0d5c..08226464e563 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
| @@ -219,19 +219,6 @@ lockd(struct svc_rqst *rqstp) | |||
| 219 | module_put_and_exit(0); | 219 | module_put_and_exit(0); |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | |||
| 223 | static int find_socket(struct svc_serv *serv, int proto) | ||
| 224 | { | ||
| 225 | struct svc_sock *svsk; | ||
| 226 | int found = 0; | ||
| 227 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) | ||
| 228 | if (svsk->sk_sk->sk_protocol == proto) { | ||
| 229 | found = 1; | ||
| 230 | break; | ||
| 231 | } | ||
| 232 | return found; | ||
| 233 | } | ||
| 234 | |||
| 235 | /* | 222 | /* |
| 236 | * Make any sockets that are needed but not present. | 223 | * Make any sockets that are needed but not present. |
| 237 | * If nlm_udpport or nlm_tcpport were set as module | 224 | * If nlm_udpport or nlm_tcpport were set as module |
| @@ -240,17 +227,25 @@ static int find_socket(struct svc_serv *serv, int proto) | |||
| 240 | static int make_socks(struct svc_serv *serv, int proto) | 227 | static int make_socks(struct svc_serv *serv, int proto) |
| 241 | { | 228 | { |
| 242 | static int warned; | 229 | static int warned; |
| 230 | struct svc_xprt *xprt; | ||
| 243 | int err = 0; | 231 | int err = 0; |
| 244 | 232 | ||
| 245 | if (proto == IPPROTO_UDP || nlm_udpport) | 233 | if (proto == IPPROTO_UDP || nlm_udpport) { |
| 246 | if (!find_socket(serv, IPPROTO_UDP)) | 234 | xprt = svc_find_xprt(serv, "udp", 0, 0); |
| 247 | err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport, | 235 | if (!xprt) |
| 248 | SVC_SOCK_DEFAULTS); | 236 | err = svc_create_xprt(serv, "udp", nlm_udpport, |
| 249 | if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) | 237 | SVC_SOCK_DEFAULTS); |
| 250 | if (!find_socket(serv, IPPROTO_TCP)) | 238 | else |
| 251 | err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport, | 239 | svc_xprt_put(xprt); |
| 252 | SVC_SOCK_DEFAULTS); | 240 | } |
| 253 | 241 | if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) { | |
| 242 | xprt = svc_find_xprt(serv, "tcp", 0, 0); | ||
| 243 | if (!xprt) | ||
| 244 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, | ||
| 245 | SVC_SOCK_DEFAULTS); | ||
| 246 | else | ||
| 247 | svc_xprt_put(xprt); | ||
| 248 | } | ||
| 254 | if (err >= 0) { | 249 | if (err >= 0) { |
| 255 | warned = 0; | 250 | warned = 0; |
| 256 | err = 0; | 251 | err = 0; |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index bf27b6c6cb6b..385437e3387d 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
| @@ -84,6 +84,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 84 | { | 84 | { |
| 85 | struct nlm_host *host; | 85 | struct nlm_host *host; |
| 86 | struct nlm_file *file; | 86 | struct nlm_file *file; |
| 87 | int rc = rpc_success; | ||
| 87 | 88 | ||
| 88 | dprintk("lockd: TEST4 called\n"); | 89 | dprintk("lockd: TEST4 called\n"); |
| 89 | resp->cookie = argp->cookie; | 90 | resp->cookie = argp->cookie; |
| @@ -91,7 +92,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 91 | /* Don't accept test requests during grace period */ | 92 | /* Don't accept test requests during grace period */ |
| 92 | if (nlmsvc_grace_period) { | 93 | if (nlmsvc_grace_period) { |
| 93 | resp->status = nlm_lck_denied_grace_period; | 94 | resp->status = nlm_lck_denied_grace_period; |
| 94 | return rpc_success; | 95 | return rc; |
| 95 | } | 96 | } |
| 96 | 97 | ||
| 97 | /* Obtain client and file */ | 98 | /* Obtain client and file */ |
| @@ -101,12 +102,13 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 101 | /* Now check for conflicting locks */ | 102 | /* Now check for conflicting locks */ |
| 102 | resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); | 103 | resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); |
| 103 | if (resp->status == nlm_drop_reply) | 104 | if (resp->status == nlm_drop_reply) |
| 104 | return rpc_drop_reply; | 105 | rc = rpc_drop_reply; |
| 106 | else | ||
| 107 | dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); | ||
| 105 | 108 | ||
| 106 | dprintk("lockd: TEST4 status %d\n", ntohl(resp->status)); | ||
| 107 | nlm_release_host(host); | 109 | nlm_release_host(host); |
| 108 | nlm_release_file(file); | 110 | nlm_release_file(file); |
| 109 | return rpc_success; | 111 | return rc; |
| 110 | } | 112 | } |
| 111 | 113 | ||
| 112 | static __be32 | 114 | static __be32 |
| @@ -115,6 +117,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 115 | { | 117 | { |
| 116 | struct nlm_host *host; | 118 | struct nlm_host *host; |
| 117 | struct nlm_file *file; | 119 | struct nlm_file *file; |
| 120 | int rc = rpc_success; | ||
| 118 | 121 | ||
| 119 | dprintk("lockd: LOCK called\n"); | 122 | dprintk("lockd: LOCK called\n"); |
| 120 | 123 | ||
| @@ -123,7 +126,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 123 | /* Don't accept new lock requests during grace period */ | 126 | /* Don't accept new lock requests during grace period */ |
| 124 | if (nlmsvc_grace_period && !argp->reclaim) { | 127 | if (nlmsvc_grace_period && !argp->reclaim) { |
| 125 | resp->status = nlm_lck_denied_grace_period; | 128 | resp->status = nlm_lck_denied_grace_period; |
| 126 | return rpc_success; | 129 | return rc; |
| 127 | } | 130 | } |
| 128 | 131 | ||
| 129 | /* Obtain client and file */ | 132 | /* Obtain client and file */ |
| @@ -146,12 +149,13 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 146 | resp->status = nlmsvc_lock(rqstp, file, &argp->lock, | 149 | resp->status = nlmsvc_lock(rqstp, file, &argp->lock, |
| 147 | argp->block, &argp->cookie); | 150 | argp->block, &argp->cookie); |
| 148 | if (resp->status == nlm_drop_reply) | 151 | if (resp->status == nlm_drop_reply) |
| 149 | return rpc_drop_reply; | 152 | rc = rpc_drop_reply; |
| 153 | else | ||
| 154 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
| 150 | 155 | ||
| 151 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
| 152 | nlm_release_host(host); | 156 | nlm_release_host(host); |
| 153 | nlm_release_file(file); | 157 | nlm_release_file(file); |
| 154 | return rpc_success; | 158 | return rc; |
| 155 | } | 159 | } |
| 156 | 160 | ||
| 157 | static __be32 | 161 | static __be32 |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index d120ec39bcb0..2f4d8fa66689 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
| @@ -501,25 +501,29 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
| 501 | block, block->b_flags, block->b_fl); | 501 | block, block->b_flags, block->b_fl); |
| 502 | if (block->b_flags & B_TIMED_OUT) { | 502 | if (block->b_flags & B_TIMED_OUT) { |
| 503 | nlmsvc_unlink_block(block); | 503 | nlmsvc_unlink_block(block); |
| 504 | return nlm_lck_denied; | 504 | ret = nlm_lck_denied; |
| 505 | goto out; | ||
| 505 | } | 506 | } |
| 506 | if (block->b_flags & B_GOT_CALLBACK) { | 507 | if (block->b_flags & B_GOT_CALLBACK) { |
| 508 | nlmsvc_unlink_block(block); | ||
| 507 | if (block->b_fl != NULL | 509 | if (block->b_fl != NULL |
| 508 | && block->b_fl->fl_type != F_UNLCK) { | 510 | && block->b_fl->fl_type != F_UNLCK) { |
| 509 | lock->fl = *block->b_fl; | 511 | lock->fl = *block->b_fl; |
| 510 | goto conf_lock; | 512 | goto conf_lock; |
| 511 | } | 513 | } else { |
| 512 | else { | 514 | ret = nlm_granted; |
| 513 | nlmsvc_unlink_block(block); | 515 | goto out; |
| 514 | return nlm_granted; | ||
| 515 | } | 516 | } |
| 516 | } | 517 | } |
| 517 | return nlm_drop_reply; | 518 | ret = nlm_drop_reply; |
| 519 | goto out; | ||
| 518 | } | 520 | } |
| 519 | 521 | ||
| 520 | error = vfs_test_lock(file->f_file, &lock->fl); | 522 | error = vfs_test_lock(file->f_file, &lock->fl); |
| 521 | if (error == -EINPROGRESS) | 523 | if (error == -EINPROGRESS) { |
| 522 | return nlmsvc_defer_lock_rqst(rqstp, block); | 524 | ret = nlmsvc_defer_lock_rqst(rqstp, block); |
| 525 | goto out; | ||
| 526 | } | ||
| 523 | if (error) { | 527 | if (error) { |
| 524 | ret = nlm_lck_denied_nolocks; | 528 | ret = nlm_lck_denied_nolocks; |
| 525 | goto out; | 529 | goto out; |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 9cd5c8b37593..88379cc6e0b1 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
| @@ -113,6 +113,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 113 | { | 113 | { |
| 114 | struct nlm_host *host; | 114 | struct nlm_host *host; |
| 115 | struct nlm_file *file; | 115 | struct nlm_file *file; |
| 116 | int rc = rpc_success; | ||
| 116 | 117 | ||
| 117 | dprintk("lockd: TEST called\n"); | 118 | dprintk("lockd: TEST called\n"); |
| 118 | resp->cookie = argp->cookie; | 119 | resp->cookie = argp->cookie; |
| @@ -120,7 +121,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 120 | /* Don't accept test requests during grace period */ | 121 | /* Don't accept test requests during grace period */ |
| 121 | if (nlmsvc_grace_period) { | 122 | if (nlmsvc_grace_period) { |
| 122 | resp->status = nlm_lck_denied_grace_period; | 123 | resp->status = nlm_lck_denied_grace_period; |
| 123 | return rpc_success; | 124 | return rc; |
| 124 | } | 125 | } |
| 125 | 126 | ||
| 126 | /* Obtain client and file */ | 127 | /* Obtain client and file */ |
| @@ -130,13 +131,14 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 130 | /* Now check for conflicting locks */ | 131 | /* Now check for conflicting locks */ |
| 131 | resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); | 132 | resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); |
| 132 | if (resp->status == nlm_drop_reply) | 133 | if (resp->status == nlm_drop_reply) |
| 133 | return rpc_drop_reply; | 134 | rc = rpc_drop_reply; |
| 135 | else | ||
| 136 | dprintk("lockd: TEST status %d vers %d\n", | ||
| 137 | ntohl(resp->status), rqstp->rq_vers); | ||
| 134 | 138 | ||
| 135 | dprintk("lockd: TEST status %d vers %d\n", | ||
| 136 | ntohl(resp->status), rqstp->rq_vers); | ||
| 137 | nlm_release_host(host); | 139 | nlm_release_host(host); |
| 138 | nlm_release_file(file); | 140 | nlm_release_file(file); |
| 139 | return rpc_success; | 141 | return rc; |
| 140 | } | 142 | } |
| 141 | 143 | ||
| 142 | static __be32 | 144 | static __be32 |
| @@ -145,6 +147,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 145 | { | 147 | { |
| 146 | struct nlm_host *host; | 148 | struct nlm_host *host; |
| 147 | struct nlm_file *file; | 149 | struct nlm_file *file; |
| 150 | int rc = rpc_success; | ||
| 148 | 151 | ||
| 149 | dprintk("lockd: LOCK called\n"); | 152 | dprintk("lockd: LOCK called\n"); |
| 150 | 153 | ||
| @@ -153,7 +156,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 153 | /* Don't accept new lock requests during grace period */ | 156 | /* Don't accept new lock requests during grace period */ |
| 154 | if (nlmsvc_grace_period && !argp->reclaim) { | 157 | if (nlmsvc_grace_period && !argp->reclaim) { |
| 155 | resp->status = nlm_lck_denied_grace_period; | 158 | resp->status = nlm_lck_denied_grace_period; |
| 156 | return rpc_success; | 159 | return rc; |
| 157 | } | 160 | } |
| 158 | 161 | ||
| 159 | /* Obtain client and file */ | 162 | /* Obtain client and file */ |
| @@ -176,12 +179,13 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 176 | resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, | 179 | resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, |
| 177 | argp->block, &argp->cookie)); | 180 | argp->block, &argp->cookie)); |
| 178 | if (resp->status == nlm_drop_reply) | 181 | if (resp->status == nlm_drop_reply) |
| 179 | return rpc_drop_reply; | 182 | rc = rpc_drop_reply; |
| 183 | else | ||
| 184 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
| 180 | 185 | ||
| 181 | dprintk("lockd: LOCK status %d\n", ntohl(resp->status)); | ||
| 182 | nlm_release_host(host); | 186 | nlm_release_host(host); |
| 183 | nlm_release_file(file); | 187 | nlm_release_file(file); |
| 184 | return rpc_success; | 188 | return rc; |
| 185 | } | 189 | } |
| 186 | 190 | ||
| 187 | static __be32 | 191 | static __be32 |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 84ebba33b98d..dbbefbcd6712 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
| @@ -87,7 +87,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result, | |||
| 87 | unsigned int hash; | 87 | unsigned int hash; |
| 88 | __be32 nfserr; | 88 | __be32 nfserr; |
| 89 | 89 | ||
| 90 | nlm_debug_print_fh("nlm_file_lookup", f); | 90 | nlm_debug_print_fh("nlm_lookup_file", f); |
| 91 | 91 | ||
| 92 | hash = file_hash(f); | 92 | hash = file_hash(f); |
| 93 | 93 | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 9b6bbf1b9787..bd185a572a23 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
| @@ -119,8 +119,8 @@ int nfs_callback_up(void) | |||
| 119 | if (!serv) | 119 | if (!serv) |
| 120 | goto out_err; | 120 | goto out_err; |
| 121 | 121 | ||
| 122 | ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport, | 122 | ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport, |
| 123 | SVC_SOCK_ANONYMOUS); | 123 | SVC_SOCK_ANONYMOUS); |
| 124 | if (ret <= 0) | 124 | if (ret <= 0) |
| 125 | goto out_destroy; | 125 | goto out_destroy; |
| 126 | nfs_callback_tcpport = ret; | 126 | nfs_callback_tcpport = ret; |
diff --git a/include/linux/nfsd/auth.h b/fs/nfsd/auth.h index 0fb9f7212195..78b3c0e93822 100644 --- a/include/linux/nfsd/auth.h +++ b/fs/nfsd/auth.h | |||
| @@ -1,6 +1,4 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * include/linux/nfsd/auth.h | ||
| 3 | * | ||
| 4 | * nfsd-specific authentication stuff. | 2 | * nfsd-specific authentication stuff. |
| 5 | * uid/gid mapping not yet implemented. | 3 | * uid/gid mapping not yet implemented. |
| 6 | * | 4 | * |
| @@ -10,8 +8,6 @@ | |||
| 10 | #ifndef LINUX_NFSD_AUTH_H | 8 | #ifndef LINUX_NFSD_AUTH_H |
| 11 | #define LINUX_NFSD_AUTH_H | 9 | #define LINUX_NFSD_AUTH_H |
| 12 | 10 | ||
| 13 | #ifdef __KERNEL__ | ||
| 14 | |||
| 15 | #define nfsd_luid(rq, uid) ((u32)(uid)) | 11 | #define nfsd_luid(rq, uid) ((u32)(uid)) |
| 16 | #define nfsd_lgid(rq, gid) ((u32)(gid)) | 12 | #define nfsd_lgid(rq, gid) ((u32)(gid)) |
| 17 | #define nfsd_ruid(rq, uid) ((u32)(uid)) | 13 | #define nfsd_ruid(rq, uid) ((u32)(uid)) |
| @@ -23,5 +19,4 @@ | |||
| 23 | */ | 19 | */ |
| 24 | int nfsd_setuser(struct svc_rqst *, struct svc_export *); | 20 | int nfsd_setuser(struct svc_rqst *, struct svc_export *); |
| 25 | 21 | ||
| 26 | #endif /* __KERNEL__ */ | ||
| 27 | #endif /* LINUX_NFSD_AUTH_H */ | 22 | #endif /* LINUX_NFSD_AUTH_H */ |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 66d0aeb32a47..79b4bf812960 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
| @@ -1357,8 +1357,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) | |||
| 1357 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); | 1357 | mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL); |
| 1358 | 1358 | ||
| 1359 | exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); | 1359 | exp = rqst_exp_find(rqstp, FSID_NUM, fsidv); |
| 1360 | if (PTR_ERR(exp) == -ENOENT) | ||
| 1361 | return nfserr_perm; | ||
| 1362 | if (IS_ERR(exp)) | 1360 | if (IS_ERR(exp)) |
| 1363 | return nfserrno(PTR_ERR(exp)); | 1361 | return nfserrno(PTR_ERR(exp)); |
| 1364 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); | 1362 | rv = fh_compose(fhp, exp, exp->ex_dentry, NULL); |
| @@ -1637,13 +1635,19 @@ exp_verify_string(char *cp, int max) | |||
| 1637 | /* | 1635 | /* |
| 1638 | * Initialize the exports module. | 1636 | * Initialize the exports module. |
| 1639 | */ | 1637 | */ |
| 1640 | void | 1638 | int |
| 1641 | nfsd_export_init(void) | 1639 | nfsd_export_init(void) |
| 1642 | { | 1640 | { |
| 1641 | int rv; | ||
| 1643 | dprintk("nfsd: initializing export module.\n"); | 1642 | dprintk("nfsd: initializing export module.\n"); |
| 1644 | 1643 | ||
| 1645 | cache_register(&svc_export_cache); | 1644 | rv = cache_register(&svc_export_cache); |
| 1646 | cache_register(&svc_expkey_cache); | 1645 | if (rv) |
| 1646 | return rv; | ||
| 1647 | rv = cache_register(&svc_expkey_cache); | ||
| 1648 | if (rv) | ||
| 1649 | cache_unregister(&svc_export_cache); | ||
| 1650 | return rv; | ||
| 1647 | 1651 | ||
| 1648 | } | 1652 | } |
| 1649 | 1653 | ||
| @@ -1670,10 +1674,8 @@ nfsd_export_shutdown(void) | |||
| 1670 | 1674 | ||
| 1671 | exp_writelock(); | 1675 | exp_writelock(); |
| 1672 | 1676 | ||
| 1673 | if (cache_unregister(&svc_expkey_cache)) | 1677 | cache_unregister(&svc_expkey_cache); |
| 1674 | printk(KERN_ERR "nfsd: failed to unregister expkey cache\n"); | 1678 | cache_unregister(&svc_export_cache); |
| 1675 | if (cache_unregister(&svc_export_cache)) | ||
| 1676 | printk(KERN_ERR "nfsd: failed to unregister export cache\n"); | ||
| 1677 | svcauth_unix_purge(); | 1679 | svcauth_unix_purge(); |
| 1678 | 1680 | ||
| 1679 | exp_writeunlock(); | 1681 | exp_writeunlock(); |
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 0e5fa11e6b44..1c3b7654e966 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c | |||
| @@ -221,12 +221,17 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p, | |||
| 221 | struct nfsd3_getaclres *resp) | 221 | struct nfsd3_getaclres *resp) |
| 222 | { | 222 | { |
| 223 | struct dentry *dentry = resp->fh.fh_dentry; | 223 | struct dentry *dentry = resp->fh.fh_dentry; |
| 224 | struct inode *inode = dentry->d_inode; | 224 | struct inode *inode; |
| 225 | struct kvec *head = rqstp->rq_res.head; | 225 | struct kvec *head = rqstp->rq_res.head; |
| 226 | unsigned int base; | 226 | unsigned int base; |
| 227 | int n; | 227 | int n; |
| 228 | int w; | 228 | int w; |
| 229 | 229 | ||
| 230 | /* | ||
| 231 | * Since this is version 2, the check for nfserr in | ||
| 232 | * nfsd_dispatch actually ensures the following cannot happen. | ||
| 233 | * However, it seems fragile to depend on that. | ||
| 234 | */ | ||
| 230 | if (dentry == NULL || dentry->d_inode == NULL) | 235 | if (dentry == NULL || dentry->d_inode == NULL) |
| 231 | return 0; | 236 | return 0; |
| 232 | inode = dentry->d_inode; | 237 | inode = dentry->d_inode; |
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index f917fd25858a..d7647f70e02b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/sunrpc/svc.h> | 21 | #include <linux/sunrpc/svc.h> |
| 22 | #include <linux/nfsd/nfsd.h> | 22 | #include <linux/nfsd/nfsd.h> |
| 23 | #include <linux/nfsd/xdr3.h> | 23 | #include <linux/nfsd/xdr3.h> |
| 24 | #include "auth.h" | ||
| 24 | 25 | ||
| 25 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 26 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
| 26 | 27 | ||
| @@ -88,10 +89,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp) | |||
| 88 | * no slashes or null bytes. | 89 | * no slashes or null bytes. |
| 89 | */ | 90 | */ |
| 90 | static __be32 * | 91 | static __be32 * |
| 91 | decode_filename(__be32 *p, char **namp, int *lenp) | 92 | decode_filename(__be32 *p, char **namp, unsigned int *lenp) |
| 92 | { | 93 | { |
| 93 | char *name; | 94 | char *name; |
| 94 | int i; | 95 | unsigned int i; |
| 95 | 96 | ||
| 96 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { | 97 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { |
| 97 | for (i = 0, name = *namp; i < *lenp; i++, name++) { | 98 | for (i = 0, name = *namp; i < *lenp; i++, name++) { |
| @@ -452,8 +453,7 @@ int | |||
| 452 | nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, | 453 | nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, |
| 453 | struct nfsd3_symlinkargs *args) | 454 | struct nfsd3_symlinkargs *args) |
| 454 | { | 455 | { |
| 455 | unsigned int len; | 456 | unsigned int len, avail; |
| 456 | int avail; | ||
| 457 | char *old, *new; | 457 | char *old, *new; |
| 458 | struct kvec *vec; | 458 | struct kvec *vec; |
| 459 | 459 | ||
| @@ -486,7 +486,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, | |||
| 486 | /* now copy next page if there is one */ | 486 | /* now copy next page if there is one */ |
| 487 | if (len && !avail && rqstp->rq_arg.page_len) { | 487 | if (len && !avail && rqstp->rq_arg.page_len) { |
| 488 | avail = rqstp->rq_arg.page_len; | 488 | avail = rqstp->rq_arg.page_len; |
| 489 | if (avail > PAGE_SIZE) avail = PAGE_SIZE; | 489 | if (avail > PAGE_SIZE) |
| 490 | avail = PAGE_SIZE; | ||
| 490 | old = page_address(rqstp->rq_arg.pages[0]); | 491 | old = page_address(rqstp->rq_arg.pages[0]); |
| 491 | } | 492 | } |
| 492 | while (len && avail && *old) { | 493 | while (len && avail && *old) { |
| @@ -816,11 +817,11 @@ static __be32 * | |||
| 816 | encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, | 817 | encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, |
| 817 | struct svc_fh *fhp) | 818 | struct svc_fh *fhp) |
| 818 | { | 819 | { |
| 819 | p = encode_post_op_attr(cd->rqstp, p, fhp); | 820 | p = encode_post_op_attr(cd->rqstp, p, fhp); |
| 820 | *p++ = xdr_one; /* yes, a file handle follows */ | 821 | *p++ = xdr_one; /* yes, a file handle follows */ |
| 821 | p = encode_fh(p, fhp); | 822 | p = encode_fh(p, fhp); |
| 822 | fh_put(fhp); | 823 | fh_put(fhp); |
| 823 | return p; | 824 | return p; |
| 824 | } | 825 | } |
| 825 | 826 | ||
| 826 | static int | 827 | static int |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 9d536a8cb379..aae2b29ae2c9 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
| @@ -350,30 +350,6 @@ static struct rpc_version * nfs_cb_version[] = { | |||
| 350 | static int do_probe_callback(void *data) | 350 | static int do_probe_callback(void *data) |
| 351 | { | 351 | { |
| 352 | struct nfs4_client *clp = data; | 352 | struct nfs4_client *clp = data; |
| 353 | struct nfs4_callback *cb = &clp->cl_callback; | ||
| 354 | struct rpc_message msg = { | ||
| 355 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | ||
| 356 | .rpc_argp = clp, | ||
| 357 | }; | ||
| 358 | int status; | ||
| 359 | |||
| 360 | status = rpc_call_sync(cb->cb_client, &msg, RPC_TASK_SOFT); | ||
| 361 | |||
| 362 | if (status) { | ||
| 363 | rpc_shutdown_client(cb->cb_client); | ||
| 364 | cb->cb_client = NULL; | ||
| 365 | } else | ||
| 366 | atomic_set(&cb->cb_set, 1); | ||
| 367 | put_nfs4_client(clp); | ||
| 368 | return 0; | ||
| 369 | } | ||
| 370 | |||
| 371 | /* | ||
| 372 | * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... | ||
| 373 | */ | ||
| 374 | void | ||
| 375 | nfsd4_probe_callback(struct nfs4_client *clp) | ||
| 376 | { | ||
| 377 | struct sockaddr_in addr; | 353 | struct sockaddr_in addr; |
| 378 | struct nfs4_callback *cb = &clp->cl_callback; | 354 | struct nfs4_callback *cb = &clp->cl_callback; |
| 379 | struct rpc_timeout timeparms = { | 355 | struct rpc_timeout timeparms = { |
| @@ -390,13 +366,15 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
| 390 | .timeout = &timeparms, | 366 | .timeout = &timeparms, |
| 391 | .program = program, | 367 | .program = program, |
| 392 | .version = nfs_cb_version[1]->number, | 368 | .version = nfs_cb_version[1]->number, |
| 393 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ | 369 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ |
| 394 | .flags = (RPC_CLNT_CREATE_NOPING), | 370 | .flags = (RPC_CLNT_CREATE_NOPING), |
| 395 | }; | 371 | }; |
| 396 | struct task_struct *t; | 372 | struct rpc_message msg = { |
| 397 | 373 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | |
| 398 | if (atomic_read(&cb->cb_set)) | 374 | .rpc_argp = clp, |
| 399 | return; | 375 | }; |
| 376 | struct rpc_clnt *client; | ||
| 377 | int status; | ||
| 400 | 378 | ||
| 401 | /* Initialize address */ | 379 | /* Initialize address */ |
| 402 | memset(&addr, 0, sizeof(addr)); | 380 | memset(&addr, 0, sizeof(addr)); |
| @@ -416,29 +394,50 @@ nfsd4_probe_callback(struct nfs4_client *clp) | |||
| 416 | program->stats->program = program; | 394 | program->stats->program = program; |
| 417 | 395 | ||
| 418 | /* Create RPC client */ | 396 | /* Create RPC client */ |
| 419 | cb->cb_client = rpc_create(&args); | 397 | client = rpc_create(&args); |
| 420 | if (IS_ERR(cb->cb_client)) { | 398 | if (IS_ERR(client)) { |
| 421 | dprintk("NFSD: couldn't create callback client\n"); | 399 | dprintk("NFSD: couldn't create callback client\n"); |
| 400 | status = PTR_ERR(client); | ||
| 422 | goto out_err; | 401 | goto out_err; |
| 423 | } | 402 | } |
| 424 | 403 | ||
| 404 | status = rpc_call_sync(client, &msg, RPC_TASK_SOFT); | ||
| 405 | |||
| 406 | if (status) | ||
| 407 | goto out_release_client; | ||
| 408 | |||
| 409 | cb->cb_client = client; | ||
| 410 | atomic_set(&cb->cb_set, 1); | ||
| 411 | put_nfs4_client(clp); | ||
| 412 | return 0; | ||
| 413 | out_release_client: | ||
| 414 | rpc_shutdown_client(client); | ||
| 415 | out_err: | ||
| 416 | put_nfs4_client(clp); | ||
| 417 | dprintk("NFSD: warning: no callback path to client %.*s\n", | ||
| 418 | (int)clp->cl_name.len, clp->cl_name.data); | ||
| 419 | return status; | ||
| 420 | } | ||
| 421 | |||
| 422 | /* | ||
| 423 | * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... | ||
| 424 | */ | ||
| 425 | void | ||
| 426 | nfsd4_probe_callback(struct nfs4_client *clp) | ||
| 427 | { | ||
| 428 | struct task_struct *t; | ||
| 429 | |||
| 430 | BUG_ON(atomic_read(&clp->cl_callback.cb_set)); | ||
| 431 | |||
| 425 | /* the task holds a reference to the nfs4_client struct */ | 432 | /* the task holds a reference to the nfs4_client struct */ |
| 426 | atomic_inc(&clp->cl_count); | 433 | atomic_inc(&clp->cl_count); |
| 427 | 434 | ||
| 428 | t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); | 435 | t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); |
| 429 | 436 | ||
| 430 | if (IS_ERR(t)) | 437 | if (IS_ERR(t)) |
| 431 | goto out_release_clp; | 438 | atomic_dec(&clp->cl_count); |
| 432 | 439 | ||
| 433 | return; | 440 | return; |
| 434 | |||
| 435 | out_release_clp: | ||
| 436 | atomic_dec(&clp->cl_count); | ||
| 437 | rpc_shutdown_client(cb->cb_client); | ||
| 438 | out_err: | ||
| 439 | cb->cb_client = NULL; | ||
| 440 | dprintk("NFSD: warning: no callback path to client %.*s\n", | ||
| 441 | (int)clp->cl_name.len, clp->cl_name.data); | ||
| 442 | } | 441 | } |
| 443 | 442 | ||
| 444 | /* | 443 | /* |
| @@ -458,9 +457,6 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
| 458 | int retries = 1; | 457 | int retries = 1; |
| 459 | int status = 0; | 458 | int status = 0; |
| 460 | 459 | ||
| 461 | if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt) | ||
| 462 | return; | ||
| 463 | |||
| 464 | cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ | 460 | cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ |
| 465 | cbr->cbr_dp = dp; | 461 | cbr->cbr_dp = dp; |
| 466 | 462 | ||
| @@ -469,6 +465,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
| 469 | switch (status) { | 465 | switch (status) { |
| 470 | case -EIO: | 466 | case -EIO: |
| 471 | /* Network partition? */ | 467 | /* Network partition? */ |
| 468 | atomic_set(&clp->cl_callback.cb_set, 0); | ||
| 472 | case -EBADHANDLE: | 469 | case -EBADHANDLE: |
| 473 | case -NFS4ERR_BAD_STATEID: | 470 | case -NFS4ERR_BAD_STATEID: |
| 474 | /* Race: client probably got cb_recall | 471 | /* Race: client probably got cb_recall |
| @@ -481,11 +478,10 @@ nfsd4_cb_recall(struct nfs4_delegation *dp) | |||
| 481 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); | 478 | status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); |
| 482 | } | 479 | } |
| 483 | out_put_cred: | 480 | out_put_cred: |
| 484 | if (status == -EIO) | 481 | /* |
| 485 | atomic_set(&clp->cl_callback.cb_set, 0); | 482 | * Success or failure, now we're either waiting for lease expiration |
| 486 | /* Success or failure, now we're either waiting for lease expiration | 483 | * or deleg_return. |
| 487 | * or deleg_return. */ | 484 | */ |
| 488 | dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count)); | ||
| 489 | put_nfs4_client(clp); | 485 | put_nfs4_client(clp); |
| 490 | nfs4_put_delegation(dp); | 486 | nfs4_put_delegation(dp); |
| 491 | return; | 487 | return; |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 4c0c683ce07a..996bd88b75ba 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
| @@ -255,13 +255,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) | |||
| 255 | goto out; | 255 | goto out; |
| 256 | if (len == 0) | 256 | if (len == 0) |
| 257 | set_bit(CACHE_NEGATIVE, &ent.h.flags); | 257 | set_bit(CACHE_NEGATIVE, &ent.h.flags); |
| 258 | else { | 258 | else if (len >= IDMAP_NAMESZ) |
| 259 | if (error >= IDMAP_NAMESZ) { | 259 | goto out; |
| 260 | error = -EINVAL; | 260 | else |
| 261 | goto out; | ||
| 262 | } | ||
| 263 | memcpy(ent.name, buf1, sizeof(ent.name)); | 261 | memcpy(ent.name, buf1, sizeof(ent.name)); |
| 264 | } | ||
| 265 | error = -ENOMEM; | 262 | error = -ENOMEM; |
| 266 | res = idtoname_update(&ent, res); | 263 | res = idtoname_update(&ent, res); |
| 267 | if (res == NULL) | 264 | if (res == NULL) |
| @@ -467,20 +464,25 @@ nametoid_update(struct ent *new, struct ent *old) | |||
| 467 | * Exported API | 464 | * Exported API |
| 468 | */ | 465 | */ |
| 469 | 466 | ||
| 470 | void | 467 | int |
| 471 | nfsd_idmap_init(void) | 468 | nfsd_idmap_init(void) |
| 472 | { | 469 | { |
| 473 | cache_register(&idtoname_cache); | 470 | int rv; |
| 474 | cache_register(&nametoid_cache); | 471 | |
| 472 | rv = cache_register(&idtoname_cache); | ||
| 473 | if (rv) | ||
| 474 | return rv; | ||
| 475 | rv = cache_register(&nametoid_cache); | ||
| 476 | if (rv) | ||
| 477 | cache_unregister(&idtoname_cache); | ||
| 478 | return rv; | ||
| 475 | } | 479 | } |
| 476 | 480 | ||
| 477 | void | 481 | void |
| 478 | nfsd_idmap_shutdown(void) | 482 | nfsd_idmap_shutdown(void) |
| 479 | { | 483 | { |
| 480 | if (cache_unregister(&idtoname_cache)) | 484 | cache_unregister(&idtoname_cache); |
| 481 | printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n"); | 485 | cache_unregister(&nametoid_cache); |
| 482 | if (cache_unregister(&nametoid_cache)) | ||
| 483 | printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n"); | ||
| 484 | } | 486 | } |
| 485 | 487 | ||
| 486 | /* | 488 | /* |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 18ead1790bb3..c593db047d8b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -750,7 +750,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 750 | cstate->current_fh.fh_export, | 750 | cstate->current_fh.fh_export, |
| 751 | cstate->current_fh.fh_dentry, buf, | 751 | cstate->current_fh.fh_dentry, buf, |
| 752 | &count, verify->ve_bmval, | 752 | &count, verify->ve_bmval, |
| 753 | rqstp); | 753 | rqstp, 0); |
| 754 | 754 | ||
| 755 | /* this means that nfsd4_encode_fattr() ran out of space */ | 755 | /* this means that nfsd4_encode_fattr() ran out of space */ |
| 756 | if (status == nfserr_resource && count == 0) | 756 | if (status == nfserr_resource && count == 0) |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 31673cd251c3..f6744bc03dae 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
| @@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */ | |||
| 61 | static time_t user_lease_time = 90; | 61 | static time_t user_lease_time = 90; |
| 62 | static time_t boot_time; | 62 | static time_t boot_time; |
| 63 | static int in_grace = 1; | 63 | static int in_grace = 1; |
| 64 | static u32 current_clientid = 1; | ||
| 65 | static u32 current_ownerid = 1; | 64 | static u32 current_ownerid = 1; |
| 66 | static u32 current_fileid = 1; | 65 | static u32 current_fileid = 1; |
| 67 | static u32 current_delegid = 1; | 66 | static u32 current_delegid = 1; |
| @@ -340,21 +339,20 @@ STALE_CLIENTID(clientid_t *clid) | |||
| 340 | * This type of memory management is somewhat inefficient, but we use it | 339 | * This type of memory management is somewhat inefficient, but we use it |
| 341 | * anyway since SETCLIENTID is not a common operation. | 340 | * anyway since SETCLIENTID is not a common operation. |
| 342 | */ | 341 | */ |
| 343 | static inline struct nfs4_client * | 342 | static struct nfs4_client *alloc_client(struct xdr_netobj name) |
| 344 | alloc_client(struct xdr_netobj name) | ||
| 345 | { | 343 | { |
| 346 | struct nfs4_client *clp; | 344 | struct nfs4_client *clp; |
| 347 | 345 | ||
| 348 | if ((clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) { | 346 | clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); |
| 349 | if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) { | 347 | if (clp == NULL) |
| 350 | memcpy(clp->cl_name.data, name.data, name.len); | 348 | return NULL; |
| 351 | clp->cl_name.len = name.len; | 349 | clp->cl_name.data = kmalloc(name.len, GFP_KERNEL); |
| 352 | } | 350 | if (clp->cl_name.data == NULL) { |
| 353 | else { | 351 | kfree(clp); |
| 354 | kfree(clp); | 352 | return NULL; |
| 355 | clp = NULL; | ||
| 356 | } | ||
| 357 | } | 353 | } |
| 354 | memcpy(clp->cl_name.data, name.data, name.len); | ||
| 355 | clp->cl_name.len = name.len; | ||
| 358 | return clp; | 356 | return clp; |
| 359 | } | 357 | } |
| 360 | 358 | ||
| @@ -363,8 +361,11 @@ shutdown_callback_client(struct nfs4_client *clp) | |||
| 363 | { | 361 | { |
| 364 | struct rpc_clnt *clnt = clp->cl_callback.cb_client; | 362 | struct rpc_clnt *clnt = clp->cl_callback.cb_client; |
| 365 | 363 | ||
| 366 | /* shutdown rpc client, ending any outstanding recall rpcs */ | ||
| 367 | if (clnt) { | 364 | if (clnt) { |
| 365 | /* | ||
| 366 | * Callback threads take a reference on the client, so there | ||
| 367 | * should be no outstanding callbacks at this point. | ||
| 368 | */ | ||
| 368 | clp->cl_callback.cb_client = NULL; | 369 | clp->cl_callback.cb_client = NULL; |
| 369 | rpc_shutdown_client(clnt); | 370 | rpc_shutdown_client(clnt); |
| 370 | } | 371 | } |
| @@ -422,12 +423,13 @@ expire_client(struct nfs4_client *clp) | |||
| 422 | put_nfs4_client(clp); | 423 | put_nfs4_client(clp); |
| 423 | } | 424 | } |
| 424 | 425 | ||
| 425 | static struct nfs4_client * | 426 | static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) |
| 426 | create_client(struct xdr_netobj name, char *recdir) { | 427 | { |
| 427 | struct nfs4_client *clp; | 428 | struct nfs4_client *clp; |
| 428 | 429 | ||
| 429 | if (!(clp = alloc_client(name))) | 430 | clp = alloc_client(name); |
| 430 | goto out; | 431 | if (clp == NULL) |
| 432 | return NULL; | ||
| 431 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); | 433 | memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); |
| 432 | atomic_set(&clp->cl_count, 1); | 434 | atomic_set(&clp->cl_count, 1); |
| 433 | atomic_set(&clp->cl_callback.cb_set, 0); | 435 | atomic_set(&clp->cl_callback.cb_set, 0); |
| @@ -436,32 +438,30 @@ create_client(struct xdr_netobj name, char *recdir) { | |||
| 436 | INIT_LIST_HEAD(&clp->cl_openowners); | 438 | INIT_LIST_HEAD(&clp->cl_openowners); |
| 437 | INIT_LIST_HEAD(&clp->cl_delegations); | 439 | INIT_LIST_HEAD(&clp->cl_delegations); |
| 438 | INIT_LIST_HEAD(&clp->cl_lru); | 440 | INIT_LIST_HEAD(&clp->cl_lru); |
| 439 | out: | ||
| 440 | return clp; | 441 | return clp; |
| 441 | } | 442 | } |
| 442 | 443 | ||
| 443 | static void | 444 | static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) |
| 444 | copy_verf(struct nfs4_client *target, nfs4_verifier *source) { | 445 | { |
| 445 | memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data)); | 446 | memcpy(target->cl_verifier.data, source->data, |
| 447 | sizeof(target->cl_verifier.data)); | ||
| 446 | } | 448 | } |
| 447 | 449 | ||
| 448 | static void | 450 | static void copy_clid(struct nfs4_client *target, struct nfs4_client *source) |
| 449 | copy_clid(struct nfs4_client *target, struct nfs4_client *source) { | 451 | { |
| 450 | target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; | 452 | target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; |
| 451 | target->cl_clientid.cl_id = source->cl_clientid.cl_id; | 453 | target->cl_clientid.cl_id = source->cl_clientid.cl_id; |
| 452 | } | 454 | } |
| 453 | 455 | ||
| 454 | static void | 456 | static void copy_cred(struct svc_cred *target, struct svc_cred *source) |
| 455 | copy_cred(struct svc_cred *target, struct svc_cred *source) { | 457 | { |
| 456 | |||
| 457 | target->cr_uid = source->cr_uid; | 458 | target->cr_uid = source->cr_uid; |
| 458 | target->cr_gid = source->cr_gid; | 459 | target->cr_gid = source->cr_gid; |
| 459 | target->cr_group_info = source->cr_group_info; | 460 | target->cr_group_info = source->cr_group_info; |
| 460 | get_group_info(target->cr_group_info); | 461 | get_group_info(target->cr_group_info); |
| 461 | } | 462 | } |
| 462 | 463 | ||
| 463 | static inline int | 464 | static int same_name(const char *n1, const char *n2) |
| 464 | same_name(const char *n1, const char *n2) | ||
| 465 | { | 465 | { |
| 466 | return 0 == memcmp(n1, n2, HEXDIR_LEN); | 466 | return 0 == memcmp(n1, n2, HEXDIR_LEN); |
| 467 | } | 467 | } |
| @@ -485,26 +485,26 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) | |||
| 485 | return cr1->cr_uid == cr2->cr_uid; | 485 | return cr1->cr_uid == cr2->cr_uid; |
| 486 | } | 486 | } |
| 487 | 487 | ||
| 488 | static void | 488 | static void gen_clid(struct nfs4_client *clp) |
| 489 | gen_clid(struct nfs4_client *clp) { | 489 | { |
| 490 | static u32 current_clientid = 1; | ||
| 491 | |||
| 490 | clp->cl_clientid.cl_boot = boot_time; | 492 | clp->cl_clientid.cl_boot = boot_time; |
| 491 | clp->cl_clientid.cl_id = current_clientid++; | 493 | clp->cl_clientid.cl_id = current_clientid++; |
| 492 | } | 494 | } |
| 493 | 495 | ||
| 494 | static void | 496 | static void gen_confirm(struct nfs4_client *clp) |
| 495 | gen_confirm(struct nfs4_client *clp) { | 497 | { |
| 496 | struct timespec tv; | 498 | static u32 i; |
| 497 | u32 * p; | 499 | u32 *p; |
| 498 | 500 | ||
| 499 | tv = CURRENT_TIME; | ||
| 500 | p = (u32 *)clp->cl_confirm.data; | 501 | p = (u32 *)clp->cl_confirm.data; |
| 501 | *p++ = tv.tv_sec; | 502 | *p++ = get_seconds(); |
| 502 | *p++ = tv.tv_nsec; | 503 | *p++ = i++; |
| 503 | } | 504 | } |
| 504 | 505 | ||
| 505 | static int | 506 | static int check_name(struct xdr_netobj name) |
| 506 | check_name(struct xdr_netobj name) { | 507 | { |
| 507 | |||
| 508 | if (name.len == 0) | 508 | if (name.len == 0) |
| 509 | return 0; | 509 | return 0; |
| 510 | if (name.len > NFS4_OPAQUE_LIMIT) { | 510 | if (name.len > NFS4_OPAQUE_LIMIT) { |
| @@ -683,39 +683,6 @@ out_err: | |||
| 683 | return; | 683 | return; |
| 684 | } | 684 | } |
| 685 | 685 | ||
| 686 | /* | ||
| 687 | * RFC 3010 has a complex implmentation description of processing a | ||
| 688 | * SETCLIENTID request consisting of 5 bullets, labeled as | ||
| 689 | * CASE0 - CASE4 below. | ||
| 690 | * | ||
| 691 | * NOTES: | ||
| 692 | * callback information will be processed in a future patch | ||
| 693 | * | ||
| 694 | * an unconfirmed record is added when: | ||
| 695 | * NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record. | ||
| 696 | * CASE 1: confirmed record found with matching name, principal, | ||
| 697 | * verifier, and clientid. | ||
| 698 | * CASE 2: confirmed record found with matching name, principal, | ||
| 699 | * and there is no unconfirmed record with matching | ||
| 700 | * name and principal | ||
| 701 | * | ||
| 702 | * an unconfirmed record is replaced when: | ||
| 703 | * CASE 3: confirmed record found with matching name, principal, | ||
| 704 | * and an unconfirmed record is found with matching | ||
| 705 | * name, principal, and with clientid and | ||
| 706 | * confirm that does not match the confirmed record. | ||
| 707 | * CASE 4: there is no confirmed record with matching name and | ||
| 708 | * principal. there is an unconfirmed record with | ||
| 709 | * matching name, principal. | ||
| 710 | * | ||
| 711 | * an unconfirmed record is deleted when: | ||
| 712 | * CASE 1: an unconfirmed record that matches input name, verifier, | ||
| 713 | * and confirmed clientid. | ||
| 714 | * CASE 4: any unconfirmed records with matching name and principal | ||
| 715 | * that exist after an unconfirmed record has been replaced | ||
| 716 | * as described above. | ||
| 717 | * | ||
| 718 | */ | ||
| 719 | __be32 | 686 | __be32 |
| 720 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | 687 | nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, |
| 721 | struct nfsd4_setclientid *setclid) | 688 | struct nfsd4_setclientid *setclid) |
| @@ -748,11 +715,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 748 | nfs4_lock_state(); | 715 | nfs4_lock_state(); |
| 749 | conf = find_confirmed_client_by_str(dname, strhashval); | 716 | conf = find_confirmed_client_by_str(dname, strhashval); |
| 750 | if (conf) { | 717 | if (conf) { |
| 751 | /* | 718 | /* RFC 3530 14.2.33 CASE 0: */ |
| 752 | * CASE 0: | ||
| 753 | * clname match, confirmed, different principal | ||
| 754 | * or different ip_address | ||
| 755 | */ | ||
| 756 | status = nfserr_clid_inuse; | 719 | status = nfserr_clid_inuse; |
| 757 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) | 720 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred) |
| 758 | || conf->cl_addr != sin->sin_addr.s_addr) { | 721 | || conf->cl_addr != sin->sin_addr.s_addr) { |
| @@ -761,12 +724,17 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 761 | goto out; | 724 | goto out; |
| 762 | } | 725 | } |
| 763 | } | 726 | } |
| 727 | /* | ||
| 728 | * section 14.2.33 of RFC 3530 (under the heading "IMPLEMENTATION") | ||
| 729 | * has a description of SETCLIENTID request processing consisting | ||
| 730 | * of 5 bullet points, labeled as CASE0 - CASE4 below. | ||
| 731 | */ | ||
| 764 | unconf = find_unconfirmed_client_by_str(dname, strhashval); | 732 | unconf = find_unconfirmed_client_by_str(dname, strhashval); |
| 765 | status = nfserr_resource; | 733 | status = nfserr_resource; |
| 766 | if (!conf) { | 734 | if (!conf) { |
| 767 | /* | 735 | /* |
| 768 | * CASE 4: | 736 | * RFC 3530 14.2.33 CASE 4: |
| 769 | * placed first, because it is the normal case. | 737 | * placed first, because it is the normal case |
| 770 | */ | 738 | */ |
| 771 | if (unconf) | 739 | if (unconf) |
| 772 | expire_client(unconf); | 740 | expire_client(unconf); |
| @@ -776,17 +744,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 776 | gen_clid(new); | 744 | gen_clid(new); |
| 777 | } else if (same_verf(&conf->cl_verifier, &clverifier)) { | 745 | } else if (same_verf(&conf->cl_verifier, &clverifier)) { |
| 778 | /* | 746 | /* |
| 779 | * CASE 1: | 747 | * RFC 3530 14.2.33 CASE 1: |
| 780 | * cl_name match, confirmed, principal match | 748 | * probable callback update |
| 781 | * verifier match: probable callback update | ||
| 782 | * | ||
| 783 | * remove any unconfirmed nfs4_client with | ||
| 784 | * matching cl_name, cl_verifier, and cl_clientid | ||
| 785 | * | ||
| 786 | * create and insert an unconfirmed nfs4_client with same | ||
| 787 | * cl_name, cl_verifier, and cl_clientid as existing | ||
| 788 | * nfs4_client, but with the new callback info and a | ||
| 789 | * new cl_confirm | ||
| 790 | */ | 749 | */ |
| 791 | if (unconf) { | 750 | if (unconf) { |
| 792 | /* Note this is removing unconfirmed {*x***}, | 751 | /* Note this is removing unconfirmed {*x***}, |
| @@ -802,43 +761,25 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 802 | copy_clid(new, conf); | 761 | copy_clid(new, conf); |
| 803 | } else if (!unconf) { | 762 | } else if (!unconf) { |
| 804 | /* | 763 | /* |
| 805 | * CASE 2: | 764 | * RFC 3530 14.2.33 CASE 2: |
| 806 | * clname match, confirmed, principal match | 765 | * probable client reboot; state will be removed if |
| 807 | * verfier does not match | 766 | * confirmed. |
| 808 | * no unconfirmed. create a new unconfirmed nfs4_client | ||
| 809 | * using input clverifier, clname, and callback info | ||
| 810 | * and generate a new cl_clientid and cl_confirm. | ||
| 811 | */ | 767 | */ |
| 812 | new = create_client(clname, dname); | 768 | new = create_client(clname, dname); |
| 813 | if (new == NULL) | 769 | if (new == NULL) |
| 814 | goto out; | 770 | goto out; |
| 815 | gen_clid(new); | 771 | gen_clid(new); |
| 816 | } else if (!same_verf(&conf->cl_confirm, &unconf->cl_confirm)) { | 772 | } else { |
| 817 | /* | 773 | /* |
| 818 | * CASE3: | 774 | * RFC 3530 14.2.33 CASE 3: |
| 819 | * confirmed found (name, principal match) | 775 | * probable client reboot; state will be removed if |
| 820 | * confirmed verifier does not match input clverifier | 776 | * confirmed. |
| 821 | * | ||
| 822 | * unconfirmed found (name match) | ||
| 823 | * confirmed->cl_confirm != unconfirmed->cl_confirm | ||
| 824 | * | ||
| 825 | * remove unconfirmed. | ||
| 826 | * | ||
| 827 | * create an unconfirmed nfs4_client | ||
| 828 | * with same cl_name as existing confirmed nfs4_client, | ||
| 829 | * but with new callback info, new cl_clientid, | ||
| 830 | * new cl_verifier and a new cl_confirm | ||
| 831 | */ | 777 | */ |
| 832 | expire_client(unconf); | 778 | expire_client(unconf); |
| 833 | new = create_client(clname, dname); | 779 | new = create_client(clname, dname); |
| 834 | if (new == NULL) | 780 | if (new == NULL) |
| 835 | goto out; | 781 | goto out; |
| 836 | gen_clid(new); | 782 | gen_clid(new); |
| 837 | } else { | ||
| 838 | /* No cases hit !!! */ | ||
| 839 | status = nfserr_inval; | ||
| 840 | goto out; | ||
| 841 | |||
| 842 | } | 783 | } |
| 843 | copy_verf(new, &clverifier); | 784 | copy_verf(new, &clverifier); |
| 844 | new->cl_addr = sin->sin_addr.s_addr; | 785 | new->cl_addr = sin->sin_addr.s_addr; |
| @@ -857,11 +798,9 @@ out: | |||
| 857 | 798 | ||
| 858 | 799 | ||
| 859 | /* | 800 | /* |
| 860 | * RFC 3010 has a complex implmentation description of processing a | 801 | * Section 14.2.34 of RFC 3530 (under the heading "IMPLEMENTATION") has |
| 861 | * SETCLIENTID_CONFIRM request consisting of 4 bullets describing | 802 | * a description of SETCLIENTID_CONFIRM request processing consisting of 4 |
| 862 | * processing on a DRC miss, labeled as CASE1 - CASE4 below. | 803 | * bullets, labeled as CASE1 - CASE4 below. |
| 863 | * | ||
| 864 | * NOTE: callback information will be processed here in a future patch | ||
| 865 | */ | 804 | */ |
| 866 | __be32 | 805 | __be32 |
| 867 | nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | 806 | nfsd4_setclientid_confirm(struct svc_rqst *rqstp, |
| @@ -892,16 +831,16 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
| 892 | if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) | 831 | if (unconf && unconf->cl_addr != sin->sin_addr.s_addr) |
| 893 | goto out; | 832 | goto out; |
| 894 | 833 | ||
| 895 | if ((conf && unconf) && | 834 | /* |
| 896 | (same_verf(&unconf->cl_confirm, &confirm)) && | 835 | * section 14.2.34 of RFC 3530 has a description of |
| 897 | (same_verf(&conf->cl_verifier, &unconf->cl_verifier)) && | 836 | * SETCLIENTID_CONFIRM request processing consisting |
| 898 | (same_name(conf->cl_recdir,unconf->cl_recdir)) && | 837 | * of 4 bullet points, labeled as CASE1 - CASE4 below. |
| 899 | (!same_verf(&conf->cl_confirm, &unconf->cl_confirm))) { | 838 | */ |
| 900 | /* CASE 1: | 839 | if (conf && unconf && same_verf(&confirm, &unconf->cl_confirm)) { |
| 901 | * unconf record that matches input clientid and input confirm. | 840 | /* |
| 902 | * conf record that matches input clientid. | 841 | * RFC 3530 14.2.34 CASE 1: |
| 903 | * conf and unconf records match names, verifiers | 842 | * callback update |
| 904 | */ | 843 | */ |
| 905 | if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) | 844 | if (!same_creds(&conf->cl_cred, &unconf->cl_cred)) |
| 906 | status = nfserr_clid_inuse; | 845 | status = nfserr_clid_inuse; |
| 907 | else { | 846 | else { |
| @@ -914,15 +853,11 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
| 914 | status = nfs_ok; | 853 | status = nfs_ok; |
| 915 | 854 | ||
| 916 | } | 855 | } |
| 917 | } else if ((conf && !unconf) || | 856 | } else if (conf && !unconf) { |
| 918 | ((conf && unconf) && | 857 | /* |
| 919 | (!same_verf(&conf->cl_verifier, &unconf->cl_verifier) || | 858 | * RFC 3530 14.2.34 CASE 2: |
| 920 | !same_name(conf->cl_recdir, unconf->cl_recdir)))) { | 859 | * probable retransmitted request; play it safe and |
| 921 | /* CASE 2: | 860 | * do nothing. |
| 922 | * conf record that matches input clientid. | ||
| 923 | * if unconf record matches input clientid, then | ||
| 924 | * unconf->cl_name or unconf->cl_verifier don't match the | ||
| 925 | * conf record. | ||
| 926 | */ | 861 | */ |
| 927 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) | 862 | if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) |
| 928 | status = nfserr_clid_inuse; | 863 | status = nfserr_clid_inuse; |
| @@ -930,10 +865,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
| 930 | status = nfs_ok; | 865 | status = nfs_ok; |
| 931 | } else if (!conf && unconf | 866 | } else if (!conf && unconf |
| 932 | && same_verf(&unconf->cl_confirm, &confirm)) { | 867 | && same_verf(&unconf->cl_confirm, &confirm)) { |
| 933 | /* CASE 3: | 868 | /* |
| 934 | * conf record not found. | 869 | * RFC 3530 14.2.34 CASE 3: |
| 935 | * unconf record found. | 870 | * Normal case; new or rebooted client: |
| 936 | * unconf->cl_confirm matches input confirm | ||
| 937 | */ | 871 | */ |
| 938 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { | 872 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) { |
| 939 | status = nfserr_clid_inuse; | 873 | status = nfserr_clid_inuse; |
| @@ -948,16 +882,15 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
| 948 | } | 882 | } |
| 949 | move_to_confirmed(unconf); | 883 | move_to_confirmed(unconf); |
| 950 | conf = unconf; | 884 | conf = unconf; |
| 885 | nfsd4_probe_callback(conf); | ||
| 951 | status = nfs_ok; | 886 | status = nfs_ok; |
| 952 | } | 887 | } |
| 953 | } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) | 888 | } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) |
| 954 | && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, | 889 | && (!unconf || (unconf && !same_verf(&unconf->cl_confirm, |
| 955 | &confirm)))) { | 890 | &confirm)))) { |
| 956 | /* CASE 4: | 891 | /* |
| 957 | * conf record not found, or if conf, conf->cl_confirm does not | 892 | * RFC 3530 14.2.34 CASE 4: |
| 958 | * match input confirm. | 893 | * Client probably hasn't noticed that we rebooted yet. |
| 959 | * unconf record not found, or if unconf, unconf->cl_confirm | ||
| 960 | * does not match input confirm. | ||
| 961 | */ | 894 | */ |
| 962 | status = nfserr_stale_clientid; | 895 | status = nfserr_stale_clientid; |
| 963 | } else { | 896 | } else { |
| @@ -965,8 +898,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
| 965 | status = nfserr_clid_inuse; | 898 | status = nfserr_clid_inuse; |
| 966 | } | 899 | } |
| 967 | out: | 900 | out: |
| 968 | if (!status) | ||
| 969 | nfsd4_probe_callback(conf); | ||
| 970 | nfs4_unlock_state(); | 901 | nfs4_unlock_state(); |
| 971 | return status; | 902 | return status; |
| 972 | } | 903 | } |
| @@ -1226,14 +1157,19 @@ find_file(struct inode *ino) | |||
| 1226 | return NULL; | 1157 | return NULL; |
| 1227 | } | 1158 | } |
| 1228 | 1159 | ||
| 1229 | static int access_valid(u32 x) | 1160 | static inline int access_valid(u32 x) |
| 1230 | { | 1161 | { |
| 1231 | return (x > 0 && x < 4); | 1162 | if (x < NFS4_SHARE_ACCESS_READ) |
| 1163 | return 0; | ||
| 1164 | if (x > NFS4_SHARE_ACCESS_BOTH) | ||
| 1165 | return 0; | ||
| 1166 | return 1; | ||
| 1232 | } | 1167 | } |
| 1233 | 1168 | ||
| 1234 | static int deny_valid(u32 x) | 1169 | static inline int deny_valid(u32 x) |
| 1235 | { | 1170 | { |
| 1236 | return (x >= 0 && x < 5); | 1171 | /* Note: unlike access bits, deny bits may be zero. */ |
| 1172 | return x <= NFS4_SHARE_DENY_BOTH; | ||
| 1237 | } | 1173 | } |
| 1238 | 1174 | ||
| 1239 | static void | 1175 | static void |
| @@ -2162,8 +2098,10 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
| 2162 | goto check_replay; | 2098 | goto check_replay; |
| 2163 | } | 2099 | } |
| 2164 | 2100 | ||
| 2101 | *stpp = stp; | ||
| 2102 | *sopp = sop = stp->st_stateowner; | ||
| 2103 | |||
| 2165 | if (lock) { | 2104 | if (lock) { |
| 2166 | struct nfs4_stateowner *sop = stp->st_stateowner; | ||
| 2167 | clientid_t *lockclid = &lock->v.new.clientid; | 2105 | clientid_t *lockclid = &lock->v.new.clientid; |
| 2168 | struct nfs4_client *clp = sop->so_client; | 2106 | struct nfs4_client *clp = sop->so_client; |
| 2169 | int lkflg = 0; | 2107 | int lkflg = 0; |
| @@ -2193,9 +2131,6 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei | |||
| 2193 | return nfserr_bad_stateid; | 2131 | return nfserr_bad_stateid; |
| 2194 | } | 2132 | } |
| 2195 | 2133 | ||
| 2196 | *stpp = stp; | ||
| 2197 | *sopp = sop = stp->st_stateowner; | ||
| 2198 | |||
| 2199 | /* | 2134 | /* |
| 2200 | * We now validate the seqid and stateid generation numbers. | 2135 | * We now validate the seqid and stateid generation numbers. |
| 2201 | * For the moment, we ignore the possibility of | 2136 | * For the moment, we ignore the possibility of |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 57333944af7f..b0592e7c378d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -148,12 +148,12 @@ xdr_error: \ | |||
| 148 | } \ | 148 | } \ |
| 149 | } while (0) | 149 | } while (0) |
| 150 | 150 | ||
| 151 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) | 151 | static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) |
| 152 | { | 152 | { |
| 153 | /* We want more bytes than seem to be available. | 153 | /* We want more bytes than seem to be available. |
| 154 | * Maybe we need a new page, maybe we have just run out | 154 | * Maybe we need a new page, maybe we have just run out |
| 155 | */ | 155 | */ |
| 156 | int avail = (char*)argp->end - (char*)argp->p; | 156 | unsigned int avail = (char *)argp->end - (char *)argp->p; |
| 157 | __be32 *p; | 157 | __be32 *p; |
| 158 | if (avail + argp->pagelen < nbytes) | 158 | if (avail + argp->pagelen < nbytes) |
| 159 | return NULL; | 159 | return NULL; |
| @@ -169,6 +169,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) | |||
| 169 | return NULL; | 169 | return NULL; |
| 170 | 170 | ||
| 171 | } | 171 | } |
| 172 | /* | ||
| 173 | * The following memcpy is safe because read_buf is always | ||
| 174 | * called with nbytes > avail, and the two cases above both | ||
| 175 | * guarantee p points to at least nbytes bytes. | ||
| 176 | */ | ||
| 172 | memcpy(p, argp->p, avail); | 177 | memcpy(p, argp->p, avail); |
| 173 | /* step to next page */ | 178 | /* step to next page */ |
| 174 | argp->p = page_address(argp->pagelist[0]); | 179 | argp->p = page_address(argp->pagelist[0]); |
| @@ -1448,7 +1453,7 @@ static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) | |||
| 1448 | __be32 | 1453 | __be32 |
| 1449 | nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | 1454 | nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, |
| 1450 | struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, | 1455 | struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, |
| 1451 | struct svc_rqst *rqstp) | 1456 | struct svc_rqst *rqstp, int ignore_crossmnt) |
| 1452 | { | 1457 | { |
| 1453 | u32 bmval0 = bmval[0]; | 1458 | u32 bmval0 = bmval[0]; |
| 1454 | u32 bmval1 = bmval[1]; | 1459 | u32 bmval1 = bmval[1]; |
| @@ -1828,7 +1833,12 @@ out_acl: | |||
| 1828 | if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { | 1833 | if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { |
| 1829 | if ((buflen -= 8) < 0) | 1834 | if ((buflen -= 8) < 0) |
| 1830 | goto out_resource; | 1835 | goto out_resource; |
| 1831 | if (exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) { | 1836 | /* |
| 1837 | * Get parent's attributes if not ignoring crossmount | ||
| 1838 | * and this is the root of a cross-mounted filesystem. | ||
| 1839 | */ | ||
| 1840 | if (ignore_crossmnt == 0 && | ||
| 1841 | exp->ex_mnt->mnt_root->d_inode == dentry->d_inode) { | ||
| 1832 | err = vfs_getattr(exp->ex_mnt->mnt_parent, | 1842 | err = vfs_getattr(exp->ex_mnt->mnt_parent, |
| 1833 | exp->ex_mnt->mnt_mountpoint, &stat); | 1843 | exp->ex_mnt->mnt_mountpoint, &stat); |
| 1834 | if (err) | 1844 | if (err) |
| @@ -1864,13 +1874,25 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | |||
| 1864 | struct svc_export *exp = cd->rd_fhp->fh_export; | 1874 | struct svc_export *exp = cd->rd_fhp->fh_export; |
| 1865 | struct dentry *dentry; | 1875 | struct dentry *dentry; |
| 1866 | __be32 nfserr; | 1876 | __be32 nfserr; |
| 1877 | int ignore_crossmnt = 0; | ||
| 1867 | 1878 | ||
| 1868 | dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); | 1879 | dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); |
| 1869 | if (IS_ERR(dentry)) | 1880 | if (IS_ERR(dentry)) |
| 1870 | return nfserrno(PTR_ERR(dentry)); | 1881 | return nfserrno(PTR_ERR(dentry)); |
| 1871 | 1882 | ||
| 1872 | exp_get(exp); | 1883 | exp_get(exp); |
| 1873 | if (d_mountpoint(dentry)) { | 1884 | /* |
| 1885 | * In the case of a mountpoint, the client may be asking for | ||
| 1886 | * attributes that are only properties of the underlying filesystem | ||
| 1887 | * as opposed to the cross-mounted file system. In such a case, | ||
| 1888 | * we will not follow the cross mount and will fill the attribtutes | ||
| 1889 | * directly from the mountpoint dentry. | ||
| 1890 | */ | ||
| 1891 | if (d_mountpoint(dentry) && | ||
| 1892 | (cd->rd_bmval[0] & ~FATTR4_WORD0_RDATTR_ERROR) == 0 && | ||
| 1893 | (cd->rd_bmval[1] & ~FATTR4_WORD1_MOUNTED_ON_FILEID) == 0) | ||
| 1894 | ignore_crossmnt = 1; | ||
| 1895 | else if (d_mountpoint(dentry)) { | ||
| 1874 | int err; | 1896 | int err; |
| 1875 | 1897 | ||
| 1876 | /* | 1898 | /* |
| @@ -1889,7 +1911,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, | |||
| 1889 | 1911 | ||
| 1890 | } | 1912 | } |
| 1891 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, | 1913 | nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, |
| 1892 | cd->rd_rqstp); | 1914 | cd->rd_rqstp, ignore_crossmnt); |
| 1893 | out_put: | 1915 | out_put: |
| 1894 | dput(dentry); | 1916 | dput(dentry); |
| 1895 | exp_put(exp); | 1917 | exp_put(exp); |
| @@ -2043,7 +2065,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 | |||
| 2043 | buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); | 2065 | buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); |
| 2044 | nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, | 2066 | nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, |
| 2045 | resp->p, &buflen, getattr->ga_bmval, | 2067 | resp->p, &buflen, getattr->ga_bmval, |
| 2046 | resp->rqstp); | 2068 | resp->rqstp, 0); |
| 2047 | if (!nfserr) | 2069 | if (!nfserr) |
| 2048 | resp->p += buflen; | 2070 | resp->p += buflen; |
| 2049 | return nfserr; | 2071 | return nfserr; |
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 578f2c9d56be..5bfc2ac60d54 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c | |||
| @@ -44,17 +44,17 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); | |||
| 44 | */ | 44 | */ |
| 45 | static DEFINE_SPINLOCK(cache_lock); | 45 | static DEFINE_SPINLOCK(cache_lock); |
| 46 | 46 | ||
| 47 | void | 47 | int nfsd_reply_cache_init(void) |
| 48 | nfsd_cache_init(void) | ||
| 49 | { | 48 | { |
| 50 | struct svc_cacherep *rp; | 49 | struct svc_cacherep *rp; |
| 51 | int i; | 50 | int i; |
| 52 | 51 | ||
| 53 | INIT_LIST_HEAD(&lru_head); | 52 | INIT_LIST_HEAD(&lru_head); |
| 54 | i = CACHESIZE; | 53 | i = CACHESIZE; |
| 55 | while(i) { | 54 | while (i) { |
| 56 | rp = kmalloc(sizeof(*rp), GFP_KERNEL); | 55 | rp = kmalloc(sizeof(*rp), GFP_KERNEL); |
| 57 | if (!rp) break; | 56 | if (!rp) |
| 57 | goto out_nomem; | ||
| 58 | list_add(&rp->c_lru, &lru_head); | 58 | list_add(&rp->c_lru, &lru_head); |
| 59 | rp->c_state = RC_UNUSED; | 59 | rp->c_state = RC_UNUSED; |
| 60 | rp->c_type = RC_NOCACHE; | 60 | rp->c_type = RC_NOCACHE; |
| @@ -62,23 +62,19 @@ nfsd_cache_init(void) | |||
| 62 | i--; | 62 | i--; |
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | if (i) | ||
| 66 | printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n", | ||
| 67 | CACHESIZE, CACHESIZE-i); | ||
| 68 | |||
| 69 | hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); | 65 | hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); |
| 70 | if (!hash_list) { | 66 | if (!hash_list) |
| 71 | nfsd_cache_shutdown(); | 67 | goto out_nomem; |
| 72 | printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n", | ||
| 73 | HASHSIZE * sizeof(struct hlist_head)); | ||
| 74 | return; | ||
| 75 | } | ||
| 76 | 68 | ||
| 77 | cache_disabled = 0; | 69 | cache_disabled = 0; |
| 70 | return 0; | ||
| 71 | out_nomem: | ||
| 72 | printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); | ||
| 73 | nfsd_reply_cache_shutdown(); | ||
| 74 | return -ENOMEM; | ||
| 78 | } | 75 | } |
| 79 | 76 | ||
| 80 | void | 77 | void nfsd_reply_cache_shutdown(void) |
| 81 | nfsd_cache_shutdown(void) | ||
| 82 | { | 78 | { |
| 83 | struct svc_cacherep *rp; | 79 | struct svc_cacherep *rp; |
| 84 | 80 | ||
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 77dc9893b7ba..8516137cdbb0 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
| @@ -304,6 +304,9 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) | |||
| 304 | struct auth_domain *dom; | 304 | struct auth_domain *dom; |
| 305 | struct knfsd_fh fh; | 305 | struct knfsd_fh fh; |
| 306 | 306 | ||
| 307 | if (size == 0) | ||
| 308 | return -EINVAL; | ||
| 309 | |||
| 307 | if (buf[size-1] != '\n') | 310 | if (buf[size-1] != '\n') |
| 308 | return -EINVAL; | 311 | return -EINVAL; |
| 309 | buf[size-1] = 0; | 312 | buf[size-1] = 0; |
| @@ -503,7 +506,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) | |||
| 503 | int len = 0; | 506 | int len = 0; |
| 504 | lock_kernel(); | 507 | lock_kernel(); |
| 505 | if (nfsd_serv) | 508 | if (nfsd_serv) |
| 506 | len = svc_sock_names(buf, nfsd_serv, NULL); | 509 | len = svc_xprt_names(nfsd_serv, buf, 0); |
| 507 | unlock_kernel(); | 510 | unlock_kernel(); |
| 508 | return len; | 511 | return len; |
| 509 | } | 512 | } |
| @@ -540,7 +543,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) | |||
| 540 | } | 543 | } |
| 541 | return err < 0 ? err : 0; | 544 | return err < 0 ? err : 0; |
| 542 | } | 545 | } |
| 543 | if (buf[0] == '-') { | 546 | if (buf[0] == '-' && isdigit(buf[1])) { |
| 544 | char *toclose = kstrdup(buf+1, GFP_KERNEL); | 547 | char *toclose = kstrdup(buf+1, GFP_KERNEL); |
| 545 | int len = 0; | 548 | int len = 0; |
| 546 | if (!toclose) | 549 | if (!toclose) |
| @@ -554,6 +557,53 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size) | |||
| 554 | kfree(toclose); | 557 | kfree(toclose); |
| 555 | return len; | 558 | return len; |
| 556 | } | 559 | } |
| 560 | /* | ||
| 561 | * Add a transport listener by writing it's transport name | ||
| 562 | */ | ||
| 563 | if (isalpha(buf[0])) { | ||
| 564 | int err; | ||
| 565 | char transport[16]; | ||
| 566 | int port; | ||
| 567 | if (sscanf(buf, "%15s %4d", transport, &port) == 2) { | ||
| 568 | err = nfsd_create_serv(); | ||
| 569 | if (!err) { | ||
| 570 | err = svc_create_xprt(nfsd_serv, | ||
| 571 | transport, port, | ||
| 572 | SVC_SOCK_ANONYMOUS); | ||
| 573 | if (err == -ENOENT) | ||
| 574 | /* Give a reasonable perror msg for | ||
| 575 | * bad transport string */ | ||
| 576 | err = -EPROTONOSUPPORT; | ||
| 577 | } | ||
| 578 | return err < 0 ? err : 0; | ||
| 579 | } | ||
| 580 | } | ||
| 581 | /* | ||
| 582 | * Remove a transport by writing it's transport name and port number | ||
| 583 | */ | ||
| 584 | if (buf[0] == '-' && isalpha(buf[1])) { | ||
| 585 | struct svc_xprt *xprt; | ||
| 586 | int err = -EINVAL; | ||
| 587 | char transport[16]; | ||
| 588 | int port; | ||
| 589 | if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { | ||
| 590 | if (port == 0) | ||
| 591 | return -EINVAL; | ||
| 592 | lock_kernel(); | ||
| 593 | if (nfsd_serv) { | ||
| 594 | xprt = svc_find_xprt(nfsd_serv, transport, | ||
| 595 | AF_UNSPEC, port); | ||
| 596 | if (xprt) { | ||
| 597 | svc_close_xprt(xprt); | ||
| 598 | svc_xprt_put(xprt); | ||
| 599 | err = 0; | ||
| 600 | } else | ||
| 601 | err = -ENOTCONN; | ||
| 602 | } | ||
| 603 | unlock_kernel(); | ||
| 604 | return err < 0 ? err : 0; | ||
| 605 | } | ||
| 606 | } | ||
| 557 | return -EINVAL; | 607 | return -EINVAL; |
| 558 | } | 608 | } |
| 559 | 609 | ||
| @@ -616,7 +666,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) | |||
| 616 | char *recdir; | 666 | char *recdir; |
| 617 | int len, status; | 667 | int len, status; |
| 618 | 668 | ||
| 619 | if (size > PATH_MAX || buf[size-1] != '\n') | 669 | if (size == 0 || size > PATH_MAX || buf[size-1] != '\n') |
| 620 | return -EINVAL; | 670 | return -EINVAL; |
| 621 | buf[size-1] = 0; | 671 | buf[size-1] = 0; |
| 622 | 672 | ||
| @@ -674,6 +724,27 @@ static struct file_system_type nfsd_fs_type = { | |||
| 674 | .kill_sb = kill_litter_super, | 724 | .kill_sb = kill_litter_super, |
| 675 | }; | 725 | }; |
| 676 | 726 | ||
| 727 | #ifdef CONFIG_PROC_FS | ||
| 728 | static int create_proc_exports_entry(void) | ||
| 729 | { | ||
| 730 | struct proc_dir_entry *entry; | ||
| 731 | |||
| 732 | entry = proc_mkdir("fs/nfs", NULL); | ||
| 733 | if (!entry) | ||
| 734 | return -ENOMEM; | ||
| 735 | entry = create_proc_entry("fs/nfs/exports", 0, NULL); | ||
| 736 | if (!entry) | ||
| 737 | return -ENOMEM; | ||
| 738 | entry->proc_fops = &exports_operations; | ||
| 739 | return 0; | ||
| 740 | } | ||
| 741 | #else /* CONFIG_PROC_FS */ | ||
| 742 | static int create_proc_exports_entry(void) | ||
| 743 | { | ||
| 744 | return 0; | ||
| 745 | } | ||
| 746 | #endif | ||
| 747 | |||
| 677 | static int __init init_nfsd(void) | 748 | static int __init init_nfsd(void) |
| 678 | { | 749 | { |
| 679 | int retval; | 750 | int retval; |
| @@ -683,32 +754,43 @@ static int __init init_nfsd(void) | |||
| 683 | if (retval) | 754 | if (retval) |
| 684 | return retval; | 755 | return retval; |
| 685 | nfsd_stat_init(); /* Statistics */ | 756 | nfsd_stat_init(); /* Statistics */ |
| 686 | nfsd_cache_init(); /* RPC reply cache */ | 757 | retval = nfsd_reply_cache_init(); |
| 687 | nfsd_export_init(); /* Exports table */ | 758 | if (retval) |
| 759 | goto out_free_stat; | ||
| 760 | retval = nfsd_export_init(); | ||
| 761 | if (retval) | ||
| 762 | goto out_free_cache; | ||
| 688 | nfsd_lockd_init(); /* lockd->nfsd callbacks */ | 763 | nfsd_lockd_init(); /* lockd->nfsd callbacks */ |
| 689 | nfsd_idmap_init(); /* Name to ID mapping */ | 764 | retval = nfsd_idmap_init(); |
| 690 | if (proc_mkdir("fs/nfs", NULL)) { | 765 | if (retval) |
| 691 | struct proc_dir_entry *entry; | 766 | goto out_free_lockd; |
| 692 | entry = create_proc_entry("fs/nfs/exports", 0, NULL); | 767 | retval = create_proc_exports_entry(); |
| 693 | if (entry) | 768 | if (retval) |
| 694 | entry->proc_fops = &exports_operations; | 769 | goto out_free_idmap; |
| 695 | } | ||
| 696 | retval = register_filesystem(&nfsd_fs_type); | 770 | retval = register_filesystem(&nfsd_fs_type); |
| 697 | if (retval) { | 771 | if (retval) |
| 698 | nfsd_export_shutdown(); | 772 | goto out_free_all; |
| 699 | nfsd_cache_shutdown(); | 773 | return 0; |
| 700 | remove_proc_entry("fs/nfs/exports", NULL); | 774 | out_free_all: |
| 701 | remove_proc_entry("fs/nfs", NULL); | 775 | remove_proc_entry("fs/nfs/exports", NULL); |
| 702 | nfsd_stat_shutdown(); | 776 | remove_proc_entry("fs/nfs", NULL); |
| 703 | nfsd_lockd_shutdown(); | 777 | out_free_idmap: |
| 704 | } | 778 | nfsd_idmap_shutdown(); |
| 779 | out_free_lockd: | ||
| 780 | nfsd_lockd_shutdown(); | ||
| 781 | nfsd_export_shutdown(); | ||
| 782 | out_free_cache: | ||
| 783 | nfsd_reply_cache_shutdown(); | ||
| 784 | out_free_stat: | ||
| 785 | nfsd_stat_shutdown(); | ||
| 786 | nfsd4_free_slabs(); | ||
| 705 | return retval; | 787 | return retval; |
| 706 | } | 788 | } |
| 707 | 789 | ||
| 708 | static void __exit exit_nfsd(void) | 790 | static void __exit exit_nfsd(void) |
| 709 | { | 791 | { |
| 710 | nfsd_export_shutdown(); | 792 | nfsd_export_shutdown(); |
| 711 | nfsd_cache_shutdown(); | 793 | nfsd_reply_cache_shutdown(); |
| 712 | remove_proc_entry("fs/nfs/exports", NULL); | 794 | remove_proc_entry("fs/nfs/exports", NULL); |
| 713 | remove_proc_entry("fs/nfs", NULL); | 795 | remove_proc_entry("fs/nfs", NULL); |
| 714 | nfsd_stat_shutdown(); | 796 | nfsd_stat_shutdown(); |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 468f17a78441..8fbd2dc08a92 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/sunrpc/svc.h> | 22 | #include <linux/sunrpc/svc.h> |
| 23 | #include <linux/sunrpc/svcauth_gss.h> | 23 | #include <linux/sunrpc/svcauth_gss.h> |
| 24 | #include <linux/nfsd/nfsd.h> | 24 | #include <linux/nfsd/nfsd.h> |
| 25 | #include "auth.h" | ||
| 25 | 26 | ||
| 26 | #define NFSDDBG_FACILITY NFSDDBG_FH | 27 | #define NFSDDBG_FACILITY NFSDDBG_FH |
| 27 | 28 | ||
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1190aeaa92be..9647b0f7bc0c 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
| @@ -155,8 +155,8 @@ static int killsig; /* signal that was used to kill last nfsd */ | |||
| 155 | static void nfsd_last_thread(struct svc_serv *serv) | 155 | static void nfsd_last_thread(struct svc_serv *serv) |
| 156 | { | 156 | { |
| 157 | /* When last nfsd thread exits we need to do some clean-up */ | 157 | /* When last nfsd thread exits we need to do some clean-up */ |
| 158 | struct svc_sock *svsk; | 158 | struct svc_xprt *xprt; |
| 159 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) | 159 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) |
| 160 | lockd_down(); | 160 | lockd_down(); |
| 161 | nfsd_serv = NULL; | 161 | nfsd_serv = NULL; |
| 162 | nfsd_racache_shutdown(); | 162 | nfsd_racache_shutdown(); |
| @@ -236,7 +236,7 @@ static int nfsd_init_socks(int port) | |||
| 236 | 236 | ||
| 237 | error = lockd_up(IPPROTO_UDP); | 237 | error = lockd_up(IPPROTO_UDP); |
| 238 | if (error >= 0) { | 238 | if (error >= 0) { |
| 239 | error = svc_makesock(nfsd_serv, IPPROTO_UDP, port, | 239 | error = svc_create_xprt(nfsd_serv, "udp", port, |
| 240 | SVC_SOCK_DEFAULTS); | 240 | SVC_SOCK_DEFAULTS); |
| 241 | if (error < 0) | 241 | if (error < 0) |
| 242 | lockd_down(); | 242 | lockd_down(); |
| @@ -247,7 +247,7 @@ static int nfsd_init_socks(int port) | |||
| 247 | #ifdef CONFIG_NFSD_TCP | 247 | #ifdef CONFIG_NFSD_TCP |
| 248 | error = lockd_up(IPPROTO_TCP); | 248 | error = lockd_up(IPPROTO_TCP); |
| 249 | if (error >= 0) { | 249 | if (error >= 0) { |
| 250 | error = svc_makesock(nfsd_serv, IPPROTO_TCP, port, | 250 | error = svc_create_xprt(nfsd_serv, "tcp", port, |
| 251 | SVC_SOCK_DEFAULTS); | 251 | SVC_SOCK_DEFAULTS); |
| 252 | if (error < 0) | 252 | if (error < 0) |
| 253 | lockd_down(); | 253 | lockd_down(); |
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index b86e3658a0af..61ad61743d94 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <linux/nfsd/nfsd.h> | 15 | #include <linux/nfsd/nfsd.h> |
| 16 | #include <linux/nfsd/xdr.h> | 16 | #include <linux/nfsd/xdr.h> |
| 17 | #include <linux/mm.h> | 17 | #include <linux/mm.h> |
| 18 | #include "auth.h" | ||
| 18 | 19 | ||
| 19 | #define NFSDDBG_FACILITY NFSDDBG_XDR | 20 | #define NFSDDBG_FACILITY NFSDDBG_XDR |
| 20 | 21 | ||
| @@ -62,10 +63,10 @@ encode_fh(__be32 *p, struct svc_fh *fhp) | |||
| 62 | * no slashes or null bytes. | 63 | * no slashes or null bytes. |
| 63 | */ | 64 | */ |
| 64 | static __be32 * | 65 | static __be32 * |
| 65 | decode_filename(__be32 *p, char **namp, int *lenp) | 66 | decode_filename(__be32 *p, char **namp, unsigned int *lenp) |
| 66 | { | 67 | { |
| 67 | char *name; | 68 | char *name; |
| 68 | int i; | 69 | unsigned int i; |
| 69 | 70 | ||
| 70 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { | 71 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { |
| 71 | for (i = 0, name = *namp; i < *lenp; i++, name++) { | 72 | for (i = 0, name = *namp; i < *lenp; i++, name++) { |
| @@ -78,10 +79,10 @@ decode_filename(__be32 *p, char **namp, int *lenp) | |||
| 78 | } | 79 | } |
| 79 | 80 | ||
| 80 | static __be32 * | 81 | static __be32 * |
| 81 | decode_pathname(__be32 *p, char **namp, int *lenp) | 82 | decode_pathname(__be32 *p, char **namp, unsigned int *lenp) |
| 82 | { | 83 | { |
| 83 | char *name; | 84 | char *name; |
| 84 | int i; | 85 | unsigned int i; |
| 85 | 86 | ||
| 86 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { | 87 | if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { |
| 87 | for (i = 0, name = *namp; i < *lenp; i++, name++) { | 88 | for (i = 0, name = *namp; i < *lenp; i++, name++) { |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d0199189924c..cc75e4fcd02b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -132,7 +132,7 @@ out: | |||
| 132 | 132 | ||
| 133 | __be32 | 133 | __be32 |
| 134 | nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, | 134 | nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, |
| 135 | const char *name, int len, | 135 | const char *name, unsigned int len, |
| 136 | struct svc_export **exp_ret, struct dentry **dentry_ret) | 136 | struct svc_export **exp_ret, struct dentry **dentry_ret) |
| 137 | { | 137 | { |
| 138 | struct svc_export *exp; | 138 | struct svc_export *exp; |
| @@ -226,7 +226,7 @@ out_nfserr: | |||
| 226 | */ | 226 | */ |
| 227 | __be32 | 227 | __be32 |
| 228 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, | 228 | nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, |
| 229 | int len, struct svc_fh *resfh) | 229 | unsigned int len, struct svc_fh *resfh) |
| 230 | { | 230 | { |
| 231 | struct svc_export *exp; | 231 | struct svc_export *exp; |
| 232 | struct dentry *dentry; | 232 | struct dentry *dentry; |
| @@ -1151,6 +1151,26 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
| 1151 | } | 1151 | } |
| 1152 | #endif /* CONFIG_NFSD_V3 */ | 1152 | #endif /* CONFIG_NFSD_V3 */ |
| 1153 | 1153 | ||
| 1154 | __be32 | ||
| 1155 | nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, | ||
| 1156 | struct iattr *iap) | ||
| 1157 | { | ||
| 1158 | /* | ||
| 1159 | * Mode has already been set earlier in create: | ||
| 1160 | */ | ||
| 1161 | iap->ia_valid &= ~ATTR_MODE; | ||
| 1162 | /* | ||
| 1163 | * Setting uid/gid works only for root. Irix appears to | ||
| 1164 | * send along the gid on create when it tries to implement | ||
| 1165 | * setgid directories via NFS: | ||
| 1166 | */ | ||
| 1167 | if (current->fsuid != 0) | ||
| 1168 | iap->ia_valid &= ~(ATTR_UID|ATTR_GID); | ||
| 1169 | if (iap->ia_valid) | ||
| 1170 | return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); | ||
| 1171 | return 0; | ||
| 1172 | } | ||
| 1173 | |||
| 1154 | /* | 1174 | /* |
| 1155 | * Create a file (regular, directory, device, fifo); UNIX sockets | 1175 | * Create a file (regular, directory, device, fifo); UNIX sockets |
| 1156 | * not yet implemented. | 1176 | * not yet implemented. |
| @@ -1167,6 +1187,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
| 1167 | struct dentry *dentry, *dchild = NULL; | 1187 | struct dentry *dentry, *dchild = NULL; |
| 1168 | struct inode *dirp; | 1188 | struct inode *dirp; |
| 1169 | __be32 err; | 1189 | __be32 err; |
| 1190 | __be32 err2; | ||
| 1170 | int host_err; | 1191 | int host_err; |
| 1171 | 1192 | ||
| 1172 | err = nfserr_perm; | 1193 | err = nfserr_perm; |
| @@ -1257,16 +1278,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
| 1257 | } | 1278 | } |
| 1258 | 1279 | ||
| 1259 | 1280 | ||
| 1260 | /* Set file attributes. Mode has already been set and | 1281 | err2 = nfsd_create_setattr(rqstp, resfhp, iap); |
| 1261 | * setting uid/gid works only for root. Irix appears to | 1282 | if (err2) |
| 1262 | * send along the gid when it tries to implement setgid | 1283 | err = err2; |
| 1263 | * directories via NFS. | ||
| 1264 | */ | ||
| 1265 | if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { | ||
| 1266 | __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); | ||
| 1267 | if (err2) | ||
| 1268 | err = err2; | ||
| 1269 | } | ||
| 1270 | /* | 1284 | /* |
| 1271 | * Update the file handle to get the new inode info. | 1285 | * Update the file handle to get the new inode info. |
| 1272 | */ | 1286 | */ |
| @@ -1295,6 +1309,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
| 1295 | struct dentry *dentry, *dchild = NULL; | 1309 | struct dentry *dentry, *dchild = NULL; |
| 1296 | struct inode *dirp; | 1310 | struct inode *dirp; |
| 1297 | __be32 err; | 1311 | __be32 err; |
| 1312 | __be32 err2; | ||
| 1298 | int host_err; | 1313 | int host_err; |
| 1299 | __u32 v_mtime=0, v_atime=0; | 1314 | __u32 v_mtime=0, v_atime=0; |
| 1300 | 1315 | ||
| @@ -1399,16 +1414,10 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, | |||
| 1399 | iap->ia_atime.tv_nsec = 0; | 1414 | iap->ia_atime.tv_nsec = 0; |
| 1400 | } | 1415 | } |
| 1401 | 1416 | ||
| 1402 | /* Set file attributes. | ||
| 1403 | * Irix appears to send along the gid when it tries to | ||
| 1404 | * implement setgid directories via NFS. Clear out all that cruft. | ||
| 1405 | */ | ||
| 1406 | set_attr: | 1417 | set_attr: |
| 1407 | if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) { | 1418 | err2 = nfsd_create_setattr(rqstp, resfhp, iap); |
| 1408 | __be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); | 1419 | if (err2) |
| 1409 | if (err2) | 1420 | err = err2; |
| 1410 | err = err2; | ||
| 1411 | } | ||
| 1412 | 1421 | ||
| 1413 | /* | 1422 | /* |
| 1414 | * Update the filehandle to get the new inode info. | 1423 | * Update the filehandle to get the new inode info. |
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index e2d1ce36b367..4babb2a129ac 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h | |||
| @@ -173,14 +173,17 @@ void nlmclnt_next_cookie(struct nlm_cookie *); | |||
| 173 | /* | 173 | /* |
| 174 | * Host cache | 174 | * Host cache |
| 175 | */ | 175 | */ |
| 176 | struct nlm_host * nlmclnt_lookup_host(const struct sockaddr_in *, int, int, const char *, int); | 176 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *, int, int, |
| 177 | struct nlm_host * nlmsvc_lookup_host(struct svc_rqst *, const char *, int); | 177 | const char *, unsigned int); |
| 178 | struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *, | ||
| 179 | unsigned int); | ||
| 178 | struct rpc_clnt * nlm_bind_host(struct nlm_host *); | 180 | struct rpc_clnt * nlm_bind_host(struct nlm_host *); |
| 179 | void nlm_rebind_host(struct nlm_host *); | 181 | void nlm_rebind_host(struct nlm_host *); |
| 180 | struct nlm_host * nlm_get_host(struct nlm_host *); | 182 | struct nlm_host * nlm_get_host(struct nlm_host *); |
| 181 | void nlm_release_host(struct nlm_host *); | 183 | void nlm_release_host(struct nlm_host *); |
| 182 | void nlm_shutdown_hosts(void); | 184 | void nlm_shutdown_hosts(void); |
| 183 | extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, int, u32); | 185 | extern void nlm_host_rebooted(const struct sockaddr_in *, const char *, |
| 186 | unsigned int, u32); | ||
| 184 | void nsm_release(struct nsm_handle *); | 187 | void nsm_release(struct nsm_handle *); |
| 185 | 188 | ||
| 186 | 189 | ||
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 83a1f9f6237b..df18fa053bcd 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h | |||
| @@ -29,7 +29,7 @@ struct svc_rqst; | |||
| 29 | /* Lock info passed via NLM */ | 29 | /* Lock info passed via NLM */ |
| 30 | struct nlm_lock { | 30 | struct nlm_lock { |
| 31 | char * caller; | 31 | char * caller; |
| 32 | int len; /* length of "caller" */ | 32 | unsigned int len; /* length of "caller" */ |
| 33 | struct nfs_fh fh; | 33 | struct nfs_fh fh; |
| 34 | struct xdr_netobj oh; | 34 | struct xdr_netobj oh; |
| 35 | u32 svid; | 35 | u32 svid; |
| @@ -78,7 +78,7 @@ struct nlm_res { | |||
| 78 | */ | 78 | */ |
| 79 | struct nlm_reboot { | 79 | struct nlm_reboot { |
| 80 | char * mon; | 80 | char * mon; |
| 81 | int len; | 81 | unsigned int len; |
| 82 | u32 state; | 82 | u32 state; |
| 83 | __be32 addr; | 83 | __be32 addr; |
| 84 | __be32 vers; | 84 | __be32 vers; |
diff --git a/include/linux/nfsd/Kbuild b/include/linux/nfsd/Kbuild index d9c5455808e5..e726fc3a4375 100644 --- a/include/linux/nfsd/Kbuild +++ b/include/linux/nfsd/Kbuild | |||
| @@ -4,4 +4,3 @@ unifdef-y += stats.h | |||
| 4 | unifdef-y += syscall.h | 4 | unifdef-y += syscall.h |
| 5 | unifdef-y += nfsfh.h | 5 | unifdef-y += nfsfh.h |
| 6 | unifdef-y += debug.h | 6 | unifdef-y += debug.h |
| 7 | unifdef-y += auth.h | ||
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index 007480cd6a60..7b5d784cc858 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h | |||
| @@ -72,8 +72,8 @@ enum { | |||
| 72 | */ | 72 | */ |
| 73 | #define RC_DELAY (HZ/5) | 73 | #define RC_DELAY (HZ/5) |
| 74 | 74 | ||
| 75 | void nfsd_cache_init(void); | 75 | int nfsd_reply_cache_init(void); |
| 76 | void nfsd_cache_shutdown(void); | 76 | void nfsd_reply_cache_shutdown(void); |
| 77 | int nfsd_cache_lookup(struct svc_rqst *, int); | 77 | int nfsd_cache_lookup(struct svc_rqst *, int); |
| 78 | void nfsd_cache_update(struct svc_rqst *, int, __be32 *); | 78 | void nfsd_cache_update(struct svc_rqst *, int, __be32 *); |
| 79 | 79 | ||
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h index bcb7abafbca9..3a1687251367 100644 --- a/include/linux/nfsd/export.h +++ b/include/linux/nfsd/export.h | |||
| @@ -122,7 +122,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp); | |||
| 122 | /* | 122 | /* |
| 123 | * Function declarations | 123 | * Function declarations |
| 124 | */ | 124 | */ |
| 125 | void nfsd_export_init(void); | 125 | int nfsd_export_init(void); |
| 126 | void nfsd_export_shutdown(void); | 126 | void nfsd_export_shutdown(void); |
| 127 | void nfsd_export_flush(void); | 127 | void nfsd_export_flush(void); |
| 128 | void exp_readlock(void); | 128 | void exp_readlock(void); |
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 604a0d786bc6..8caf4c4f64e6 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h | |||
| @@ -20,7 +20,6 @@ | |||
| 20 | #include <linux/nfsd/debug.h> | 20 | #include <linux/nfsd/debug.h> |
| 21 | #include <linux/nfsd/nfsfh.h> | 21 | #include <linux/nfsd/nfsfh.h> |
| 22 | #include <linux/nfsd/export.h> | 22 | #include <linux/nfsd/export.h> |
| 23 | #include <linux/nfsd/auth.h> | ||
| 24 | #include <linux/nfsd/stats.h> | 23 | #include <linux/nfsd/stats.h> |
| 25 | /* | 24 | /* |
| 26 | * nfsd version | 25 | * nfsd version |
| @@ -70,9 +69,9 @@ void nfsd_racache_shutdown(void); | |||
| 70 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, | 69 | int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, |
| 71 | struct svc_export **expp); | 70 | struct svc_export **expp); |
| 72 | __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, | 71 | __be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *, |
| 73 | const char *, int, struct svc_fh *); | 72 | const char *, unsigned int, struct svc_fh *); |
| 74 | __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, | 73 | __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *, |
| 75 | const char *, int, | 74 | const char *, unsigned int, |
| 76 | struct svc_export **, struct dentry **); | 75 | struct svc_export **, struct dentry **); |
| 77 | __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, | 76 | __be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *, |
| 78 | struct iattr *, int, time_t); | 77 | struct iattr *, int, time_t); |
diff --git a/include/linux/nfsd/syscall.h b/include/linux/nfsd/syscall.h index 8bcddccb6c42..4e439765b705 100644 --- a/include/linux/nfsd/syscall.h +++ b/include/linux/nfsd/syscall.h | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include <linux/nfsd/const.h> | 18 | #include <linux/nfsd/const.h> |
| 19 | #include <linux/nfsd/export.h> | 19 | #include <linux/nfsd/export.h> |
| 20 | #include <linux/nfsd/nfsfh.h> | 20 | #include <linux/nfsd/nfsfh.h> |
| 21 | #include <linux/nfsd/auth.h> | ||
| 22 | 21 | ||
| 23 | /* | 22 | /* |
| 24 | * Version of the syscall interface | 23 | * Version of the syscall interface |
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h index 67885d5e6e50..a0132ef58f21 100644 --- a/include/linux/nfsd/xdr.h +++ b/include/linux/nfsd/xdr.h | |||
| @@ -23,7 +23,7 @@ struct nfsd_sattrargs { | |||
| 23 | struct nfsd_diropargs { | 23 | struct nfsd_diropargs { |
| 24 | struct svc_fh fh; | 24 | struct svc_fh fh; |
| 25 | char * name; | 25 | char * name; |
| 26 | int len; | 26 | unsigned int len; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | struct nfsd_readargs { | 29 | struct nfsd_readargs { |
| @@ -43,17 +43,17 @@ struct nfsd_writeargs { | |||
| 43 | struct nfsd_createargs { | 43 | struct nfsd_createargs { |
| 44 | struct svc_fh fh; | 44 | struct svc_fh fh; |
| 45 | char * name; | 45 | char * name; |
| 46 | int len; | 46 | unsigned int len; |
| 47 | struct iattr attrs; | 47 | struct iattr attrs; |
| 48 | }; | 48 | }; |
| 49 | 49 | ||
| 50 | struct nfsd_renameargs { | 50 | struct nfsd_renameargs { |
| 51 | struct svc_fh ffh; | 51 | struct svc_fh ffh; |
| 52 | char * fname; | 52 | char * fname; |
| 53 | int flen; | 53 | unsigned int flen; |
| 54 | struct svc_fh tfh; | 54 | struct svc_fh tfh; |
| 55 | char * tname; | 55 | char * tname; |
| 56 | int tlen; | 56 | unsigned int tlen; |
| 57 | }; | 57 | }; |
| 58 | 58 | ||
| 59 | struct nfsd_readlinkargs { | 59 | struct nfsd_readlinkargs { |
| @@ -65,15 +65,15 @@ struct nfsd_linkargs { | |||
| 65 | struct svc_fh ffh; | 65 | struct svc_fh ffh; |
| 66 | struct svc_fh tfh; | 66 | struct svc_fh tfh; |
| 67 | char * tname; | 67 | char * tname; |
| 68 | int tlen; | 68 | unsigned int tlen; |
| 69 | }; | 69 | }; |
| 70 | 70 | ||
| 71 | struct nfsd_symlinkargs { | 71 | struct nfsd_symlinkargs { |
| 72 | struct svc_fh ffh; | 72 | struct svc_fh ffh; |
| 73 | char * fname; | 73 | char * fname; |
| 74 | int flen; | 74 | unsigned int flen; |
| 75 | char * tname; | 75 | char * tname; |
| 76 | int tlen; | 76 | unsigned int tlen; |
| 77 | struct iattr attrs; | 77 | struct iattr attrs; |
| 78 | }; | 78 | }; |
| 79 | 79 | ||
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h index 89d9d6061a62..421eddd65a25 100644 --- a/include/linux/nfsd/xdr3.h +++ b/include/linux/nfsd/xdr3.h | |||
| @@ -21,7 +21,7 @@ struct nfsd3_sattrargs { | |||
| 21 | struct nfsd3_diropargs { | 21 | struct nfsd3_diropargs { |
| 22 | struct svc_fh fh; | 22 | struct svc_fh fh; |
| 23 | char * name; | 23 | char * name; |
| 24 | int len; | 24 | unsigned int len; |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| 27 | struct nfsd3_accessargs { | 27 | struct nfsd3_accessargs { |
| @@ -48,7 +48,7 @@ struct nfsd3_writeargs { | |||
| 48 | struct nfsd3_createargs { | 48 | struct nfsd3_createargs { |
| 49 | struct svc_fh fh; | 49 | struct svc_fh fh; |
| 50 | char * name; | 50 | char * name; |
| 51 | int len; | 51 | unsigned int len; |
| 52 | int createmode; | 52 | int createmode; |
| 53 | struct iattr attrs; | 53 | struct iattr attrs; |
| 54 | __be32 * verf; | 54 | __be32 * verf; |
| @@ -57,7 +57,7 @@ struct nfsd3_createargs { | |||
| 57 | struct nfsd3_mknodargs { | 57 | struct nfsd3_mknodargs { |
| 58 | struct svc_fh fh; | 58 | struct svc_fh fh; |
| 59 | char * name; | 59 | char * name; |
| 60 | int len; | 60 | unsigned int len; |
| 61 | __u32 ftype; | 61 | __u32 ftype; |
| 62 | __u32 major, minor; | 62 | __u32 major, minor; |
| 63 | struct iattr attrs; | 63 | struct iattr attrs; |
| @@ -66,10 +66,10 @@ struct nfsd3_mknodargs { | |||
| 66 | struct nfsd3_renameargs { | 66 | struct nfsd3_renameargs { |
| 67 | struct svc_fh ffh; | 67 | struct svc_fh ffh; |
| 68 | char * fname; | 68 | char * fname; |
| 69 | int flen; | 69 | unsigned int flen; |
| 70 | struct svc_fh tfh; | 70 | struct svc_fh tfh; |
| 71 | char * tname; | 71 | char * tname; |
| 72 | int tlen; | 72 | unsigned int tlen; |
| 73 | }; | 73 | }; |
| 74 | 74 | ||
| 75 | struct nfsd3_readlinkargs { | 75 | struct nfsd3_readlinkargs { |
| @@ -81,15 +81,15 @@ struct nfsd3_linkargs { | |||
| 81 | struct svc_fh ffh; | 81 | struct svc_fh ffh; |
| 82 | struct svc_fh tfh; | 82 | struct svc_fh tfh; |
| 83 | char * tname; | 83 | char * tname; |
| 84 | int tlen; | 84 | unsigned int tlen; |
| 85 | }; | 85 | }; |
| 86 | 86 | ||
| 87 | struct nfsd3_symlinkargs { | 87 | struct nfsd3_symlinkargs { |
| 88 | struct svc_fh ffh; | 88 | struct svc_fh ffh; |
| 89 | char * fname; | 89 | char * fname; |
| 90 | int flen; | 90 | unsigned int flen; |
| 91 | char * tname; | 91 | char * tname; |
| 92 | int tlen; | 92 | unsigned int tlen; |
| 93 | struct iattr attrs; | 93 | struct iattr attrs; |
| 94 | }; | 94 | }; |
| 95 | 95 | ||
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index b0ddfb41c790..27bd3e38ec5a 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h | |||
| @@ -441,7 +441,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); | |||
| 441 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); | 441 | void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); |
| 442 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | 442 | __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, |
| 443 | struct dentry *dentry, __be32 *buffer, int *countp, | 443 | struct dentry *dentry, __be32 *buffer, int *countp, |
| 444 | u32 *bmval, struct svc_rqst *); | 444 | u32 *bmval, struct svc_rqst *, int ignore_crossmnt); |
| 445 | extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, | 445 | extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, |
| 446 | struct nfsd4_compound_state *, | 446 | struct nfsd4_compound_state *, |
| 447 | struct nfsd4_setclientid *setclid); | 447 | struct nfsd4_setclientid *setclid); |
diff --git a/include/linux/nfsd_idmap.h b/include/linux/nfsd_idmap.h index e82746fcad14..d4a2ac18bd4c 100644 --- a/include/linux/nfsd_idmap.h +++ b/include/linux/nfsd_idmap.h | |||
| @@ -44,11 +44,16 @@ | |||
| 44 | #define IDMAP_NAMESZ 128 | 44 | #define IDMAP_NAMESZ 128 |
| 45 | 45 | ||
| 46 | #ifdef CONFIG_NFSD_V4 | 46 | #ifdef CONFIG_NFSD_V4 |
| 47 | void nfsd_idmap_init(void); | 47 | int nfsd_idmap_init(void); |
| 48 | void nfsd_idmap_shutdown(void); | 48 | void nfsd_idmap_shutdown(void); |
| 49 | #else | 49 | #else |
| 50 | static inline void nfsd_idmap_init(void) {}; | 50 | static inline int nfsd_idmap_init(void) |
| 51 | static inline void nfsd_idmap_shutdown(void) {}; | 51 | { |
| 52 | return 0; | ||
| 53 | } | ||
| 54 | static inline void nfsd_idmap_shutdown(void) | ||
| 55 | { | ||
| 56 | } | ||
| 52 | #endif | 57 | #endif |
| 53 | 58 | ||
| 54 | int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); | 59 | int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); |
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index bd7a6b0a87af..03547d6abee5 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h | |||
| @@ -169,8 +169,8 @@ extern int cache_check(struct cache_detail *detail, | |||
| 169 | extern void cache_flush(void); | 169 | extern void cache_flush(void); |
| 170 | extern void cache_purge(struct cache_detail *detail); | 170 | extern void cache_purge(struct cache_detail *detail); |
| 171 | #define NEVER (0x7FFFFFFF) | 171 | #define NEVER (0x7FFFFFFF) |
| 172 | extern void cache_register(struct cache_detail *cd); | 172 | extern int cache_register(struct cache_detail *cd); |
| 173 | extern int cache_unregister(struct cache_detail *cd); | 173 | extern void cache_unregister(struct cache_detail *cd); |
| 174 | 174 | ||
| 175 | extern void qword_add(char **bpp, int *lp, char *str); | 175 | extern void qword_add(char **bpp, int *lp, char *str); |
| 176 | extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); | 176 | extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); |
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h index 3912cf16361e..10709cbe96fd 100644 --- a/include/linux/sunrpc/debug.h +++ b/include/linux/sunrpc/debug.h | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | #define RPCDBG_BIND 0x0020 | 20 | #define RPCDBG_BIND 0x0020 |
| 21 | #define RPCDBG_SCHED 0x0040 | 21 | #define RPCDBG_SCHED 0x0040 |
| 22 | #define RPCDBG_TRANS 0x0080 | 22 | #define RPCDBG_TRANS 0x0080 |
| 23 | #define RPCDBG_SVCSOCK 0x0100 | 23 | #define RPCDBG_SVCXPRT 0x0100 |
| 24 | #define RPCDBG_SVCDSP 0x0200 | 24 | #define RPCDBG_SVCDSP 0x0200 |
| 25 | #define RPCDBG_MISC 0x0400 | 25 | #define RPCDBG_MISC 0x0400 |
| 26 | #define RPCDBG_CACHE 0x0800 | 26 | #define RPCDBG_CACHE 0x0800 |
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 8531a70da73d..64c771056187 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h | |||
| @@ -204,7 +204,7 @@ union svc_addr_u { | |||
| 204 | struct svc_rqst { | 204 | struct svc_rqst { |
| 205 | struct list_head rq_list; /* idle list */ | 205 | struct list_head rq_list; /* idle list */ |
| 206 | struct list_head rq_all; /* all threads list */ | 206 | struct list_head rq_all; /* all threads list */ |
| 207 | struct svc_sock * rq_sock; /* socket */ | 207 | struct svc_xprt * rq_xprt; /* transport ptr */ |
| 208 | struct sockaddr_storage rq_addr; /* peer address */ | 208 | struct sockaddr_storage rq_addr; /* peer address */ |
| 209 | size_t rq_addrlen; | 209 | size_t rq_addrlen; |
| 210 | 210 | ||
| @@ -214,9 +214,10 @@ struct svc_rqst { | |||
| 214 | struct auth_ops * rq_authop; /* authentication flavour */ | 214 | struct auth_ops * rq_authop; /* authentication flavour */ |
| 215 | u32 rq_flavor; /* pseudoflavor */ | 215 | u32 rq_flavor; /* pseudoflavor */ |
| 216 | struct svc_cred rq_cred; /* auth info */ | 216 | struct svc_cred rq_cred; /* auth info */ |
| 217 | struct sk_buff * rq_skbuff; /* fast recv inet buffer */ | 217 | void * rq_xprt_ctxt; /* transport specific context ptr */ |
| 218 | struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ | 218 | struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ |
| 219 | 219 | ||
| 220 | size_t rq_xprt_hlen; /* xprt header len */ | ||
| 220 | struct xdr_buf rq_arg; | 221 | struct xdr_buf rq_arg; |
| 221 | struct xdr_buf rq_res; | 222 | struct xdr_buf rq_res; |
| 222 | struct page * rq_pages[RPCSVC_MAXPAGES]; | 223 | struct page * rq_pages[RPCSVC_MAXPAGES]; |
| @@ -317,11 +318,12 @@ static inline void svc_free_res_pages(struct svc_rqst *rqstp) | |||
| 317 | 318 | ||
| 318 | struct svc_deferred_req { | 319 | struct svc_deferred_req { |
| 319 | u32 prot; /* protocol (UDP or TCP) */ | 320 | u32 prot; /* protocol (UDP or TCP) */ |
| 320 | struct svc_sock *svsk; | 321 | struct svc_xprt *xprt; |
| 321 | struct sockaddr_storage addr; /* where reply must go */ | 322 | struct sockaddr_storage addr; /* where reply must go */ |
| 322 | size_t addrlen; | 323 | size_t addrlen; |
| 323 | union svc_addr_u daddr; /* where reply must come from */ | 324 | union svc_addr_u daddr; /* where reply must come from */ |
| 324 | struct cache_deferred_req handle; | 325 | struct cache_deferred_req handle; |
| 326 | size_t xprt_hlen; | ||
| 325 | int argslen; | 327 | int argslen; |
| 326 | __be32 args[0]; | 328 | __be32 args[0]; |
| 327 | }; | 329 | }; |
| @@ -382,6 +384,8 @@ struct svc_procedure { | |||
| 382 | */ | 384 | */ |
| 383 | struct svc_serv * svc_create(struct svc_program *, unsigned int, | 385 | struct svc_serv * svc_create(struct svc_program *, unsigned int, |
| 384 | void (*shutdown)(struct svc_serv*)); | 386 | void (*shutdown)(struct svc_serv*)); |
| 387 | struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, | ||
| 388 | struct svc_pool *pool); | ||
| 385 | int svc_create_thread(svc_thread_fn, struct svc_serv *); | 389 | int svc_create_thread(svc_thread_fn, struct svc_serv *); |
| 386 | void svc_exit_thread(struct svc_rqst *); | 390 | void svc_exit_thread(struct svc_rqst *); |
| 387 | struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, | 391 | struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, |
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h new file mode 100644 index 000000000000..c11bbcc081f9 --- /dev/null +++ b/include/linux/sunrpc/svc_rdma.h | |||
| @@ -0,0 +1,262 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is available to you under a choice of one of two | ||
| 5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
| 6 | * General Public License (GPL) Version 2, available from the file | ||
| 7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
| 8 | * license below: | ||
| 9 | * | ||
| 10 | * Redistribution and use in source and binary forms, with or without | ||
| 11 | * modification, are permitted provided that the following conditions | ||
| 12 | * are met: | ||
| 13 | * | ||
| 14 | * Redistributions of source code must retain the above copyright | ||
| 15 | * notice, this list of conditions and the following disclaimer. | ||
| 16 | * | ||
| 17 | * Redistributions in binary form must reproduce the above | ||
| 18 | * copyright notice, this list of conditions and the following | ||
| 19 | * disclaimer in the documentation and/or other materials provided | ||
| 20 | * with the distribution. | ||
| 21 | * | ||
| 22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
| 23 | * its contributors may be used to endorse or promote products | ||
| 24 | * derived from this software without specific prior written | ||
| 25 | * permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | * | ||
| 39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
| 40 | */ | ||
| 41 | |||
| 42 | #ifndef SVC_RDMA_H | ||
| 43 | #define SVC_RDMA_H | ||
| 44 | #include <linux/sunrpc/xdr.h> | ||
| 45 | #include <linux/sunrpc/svcsock.h> | ||
| 46 | #include <linux/sunrpc/rpc_rdma.h> | ||
| 47 | #include <rdma/ib_verbs.h> | ||
| 48 | #include <rdma/rdma_cm.h> | ||
| 49 | #define SVCRDMA_DEBUG | ||
| 50 | |||
| 51 | /* RPC/RDMA parameters and stats */ | ||
| 52 | extern unsigned int svcrdma_ord; | ||
| 53 | extern unsigned int svcrdma_max_requests; | ||
| 54 | extern unsigned int svcrdma_max_req_size; | ||
| 55 | |||
| 56 | extern atomic_t rdma_stat_recv; | ||
| 57 | extern atomic_t rdma_stat_read; | ||
| 58 | extern atomic_t rdma_stat_write; | ||
| 59 | extern atomic_t rdma_stat_sq_starve; | ||
| 60 | extern atomic_t rdma_stat_rq_starve; | ||
| 61 | extern atomic_t rdma_stat_rq_poll; | ||
| 62 | extern atomic_t rdma_stat_rq_prod; | ||
| 63 | extern atomic_t rdma_stat_sq_poll; | ||
| 64 | extern atomic_t rdma_stat_sq_prod; | ||
| 65 | |||
| 66 | #define RPCRDMA_VERSION 1 | ||
| 67 | |||
| 68 | /* | ||
| 69 | * Contexts are built when an RDMA request is created and are a | ||
| 70 | * record of the resources that can be recovered when the request | ||
| 71 | * completes. | ||
| 72 | */ | ||
| 73 | struct svc_rdma_op_ctxt { | ||
| 74 | struct svc_rdma_op_ctxt *next; | ||
| 75 | struct xdr_buf arg; | ||
| 76 | struct list_head dto_q; | ||
| 77 | enum ib_wr_opcode wr_op; | ||
| 78 | enum ib_wc_status wc_status; | ||
| 79 | u32 byte_len; | ||
| 80 | struct svcxprt_rdma *xprt; | ||
| 81 | unsigned long flags; | ||
| 82 | enum dma_data_direction direction; | ||
| 83 | int count; | ||
| 84 | struct ib_sge sge[RPCSVC_MAXPAGES]; | ||
| 85 | struct page *pages[RPCSVC_MAXPAGES]; | ||
| 86 | }; | ||
| 87 | |||
| 88 | #define RDMACTXT_F_READ_DONE 1 | ||
| 89 | #define RDMACTXT_F_LAST_CTXT 2 | ||
| 90 | |||
| 91 | struct svcxprt_rdma { | ||
| 92 | struct svc_xprt sc_xprt; /* SVC transport structure */ | ||
| 93 | struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ | ||
| 94 | struct list_head sc_accept_q; /* Conn. waiting accept */ | ||
| 95 | int sc_ord; /* RDMA read limit */ | ||
| 96 | wait_queue_head_t sc_read_wait; | ||
| 97 | int sc_max_sge; | ||
| 98 | |||
| 99 | int sc_sq_depth; /* Depth of SQ */ | ||
| 100 | atomic_t sc_sq_count; /* Number of SQ WR on queue */ | ||
| 101 | |||
| 102 | int sc_max_requests; /* Depth of RQ */ | ||
| 103 | int sc_max_req_size; /* Size of each RQ WR buf */ | ||
| 104 | |||
| 105 | struct ib_pd *sc_pd; | ||
| 106 | |||
| 107 | struct svc_rdma_op_ctxt *sc_ctxt_head; | ||
| 108 | int sc_ctxt_cnt; | ||
| 109 | int sc_ctxt_bump; | ||
| 110 | int sc_ctxt_max; | ||
| 111 | spinlock_t sc_ctxt_lock; | ||
| 112 | struct list_head sc_rq_dto_q; | ||
| 113 | spinlock_t sc_rq_dto_lock; | ||
| 114 | struct ib_qp *sc_qp; | ||
| 115 | struct ib_cq *sc_rq_cq; | ||
| 116 | struct ib_cq *sc_sq_cq; | ||
| 117 | struct ib_mr *sc_phys_mr; /* MR for server memory */ | ||
| 118 | |||
| 119 | spinlock_t sc_lock; /* transport lock */ | ||
| 120 | |||
| 121 | wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */ | ||
| 122 | unsigned long sc_flags; | ||
| 123 | struct list_head sc_dto_q; /* DTO tasklet I/O pending Q */ | ||
| 124 | struct list_head sc_read_complete_q; | ||
| 125 | spinlock_t sc_read_complete_lock; | ||
| 126 | }; | ||
| 127 | /* sc_flags */ | ||
| 128 | #define RDMAXPRT_RQ_PENDING 1 | ||
| 129 | #define RDMAXPRT_SQ_PENDING 2 | ||
| 130 | #define RDMAXPRT_CONN_PENDING 3 | ||
| 131 | |||
| 132 | #define RPCRDMA_LISTEN_BACKLOG 10 | ||
| 133 | /* The default ORD value is based on two outstanding full-size writes with a | ||
| 134 | * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ | ||
| 135 | #define RPCRDMA_ORD (64/4) | ||
| 136 | #define RPCRDMA_SQ_DEPTH_MULT 8 | ||
| 137 | #define RPCRDMA_MAX_THREADS 16 | ||
| 138 | #define RPCRDMA_MAX_REQUESTS 16 | ||
| 139 | #define RPCRDMA_MAX_REQ_SIZE 4096 | ||
| 140 | |||
| 141 | /* svc_rdma_marshal.c */ | ||
| 142 | extern void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *, | ||
| 143 | int *, int *); | ||
| 144 | extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); | ||
| 145 | extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *); | ||
| 146 | extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, | ||
| 147 | struct rpcrdma_msg *, | ||
| 148 | enum rpcrdma_errcode, u32 *); | ||
| 149 | extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); | ||
| 150 | extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); | ||
| 151 | extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, | ||
| 152 | u32, u64, u32); | ||
| 153 | extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, | ||
| 154 | struct rpcrdma_msg *, | ||
| 155 | struct rpcrdma_msg *, | ||
| 156 | enum rpcrdma_proc); | ||
| 157 | extern int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *); | ||
| 158 | |||
| 159 | /* svc_rdma_recvfrom.c */ | ||
| 160 | extern int svc_rdma_recvfrom(struct svc_rqst *); | ||
| 161 | |||
| 162 | /* svc_rdma_sendto.c */ | ||
| 163 | extern int svc_rdma_sendto(struct svc_rqst *); | ||
| 164 | |||
| 165 | /* svc_rdma_transport.c */ | ||
| 166 | extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); | ||
| 167 | extern int svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, | ||
| 168 | enum rpcrdma_errcode); | ||
| 169 | struct page *svc_rdma_get_page(void); | ||
| 170 | extern int svc_rdma_post_recv(struct svcxprt_rdma *); | ||
| 171 | extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); | ||
| 172 | extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); | ||
| 173 | extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); | ||
| 174 | extern void svc_sq_reap(struct svcxprt_rdma *); | ||
| 175 | extern void svc_rq_reap(struct svcxprt_rdma *); | ||
| 176 | extern struct svc_xprt_class svc_rdma_class; | ||
| 177 | extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); | ||
| 178 | |||
| 179 | /* svc_rdma.c */ | ||
| 180 | extern int svc_rdma_init(void); | ||
| 181 | extern void svc_rdma_cleanup(void); | ||
| 182 | |||
| 183 | /* | ||
| 184 | * Returns the address of the first read chunk or <nul> if no read chunk is | ||
| 185 | * present | ||
| 186 | */ | ||
| 187 | static inline struct rpcrdma_read_chunk * | ||
| 188 | svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) | ||
| 189 | { | ||
| 190 | struct rpcrdma_read_chunk *ch = | ||
| 191 | (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
| 192 | |||
| 193 | if (ch->rc_discrim == 0) | ||
| 194 | return NULL; | ||
| 195 | |||
| 196 | return ch; | ||
| 197 | } | ||
| 198 | |||
| 199 | /* | ||
| 200 | * Returns the address of the first read write array element or <nul> if no | ||
| 201 | * write array list is present | ||
| 202 | */ | ||
| 203 | static inline struct rpcrdma_write_array * | ||
| 204 | svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) | ||
| 205 | { | ||
| 206 | if (rmsgp->rm_body.rm_chunks[0] != 0 | ||
| 207 | || rmsgp->rm_body.rm_chunks[1] == 0) | ||
| 208 | return NULL; | ||
| 209 | |||
| 210 | return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; | ||
| 211 | } | ||
| 212 | |||
| 213 | /* | ||
| 214 | * Returns the address of the first reply array element or <nul> if no | ||
| 215 | * reply array is present | ||
| 216 | */ | ||
| 217 | static inline struct rpcrdma_write_array * | ||
| 218 | svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) | ||
| 219 | { | ||
| 220 | struct rpcrdma_read_chunk *rch; | ||
| 221 | struct rpcrdma_write_array *wr_ary; | ||
| 222 | struct rpcrdma_write_array *rp_ary; | ||
| 223 | |||
| 224 | /* XXX: Need to fix when reply list may occur with read-list and/or | ||
| 225 | * write list */ | ||
| 226 | if (rmsgp->rm_body.rm_chunks[0] != 0 || | ||
| 227 | rmsgp->rm_body.rm_chunks[1] != 0) | ||
| 228 | return NULL; | ||
| 229 | |||
| 230 | rch = svc_rdma_get_read_chunk(rmsgp); | ||
| 231 | if (rch) { | ||
| 232 | while (rch->rc_discrim) | ||
| 233 | rch++; | ||
| 234 | |||
| 235 | /* The reply list follows an empty write array located | ||
| 236 | * at 'rc_position' here. The reply array is at rc_target. | ||
| 237 | */ | ||
| 238 | rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; | ||
| 239 | |||
| 240 | goto found_it; | ||
| 241 | } | ||
| 242 | |||
| 243 | wr_ary = svc_rdma_get_write_array(rmsgp); | ||
| 244 | if (wr_ary) { | ||
| 245 | rp_ary = (struct rpcrdma_write_array *) | ||
| 246 | &wr_ary-> | ||
| 247 | wc_array[wr_ary->wc_nchunks].wc_target.rs_length; | ||
| 248 | |||
| 249 | goto found_it; | ||
| 250 | } | ||
| 251 | |||
| 252 | /* No read list, no write list */ | ||
| 253 | rp_ary = (struct rpcrdma_write_array *) | ||
| 254 | &rmsgp->rm_body.rm_chunks[2]; | ||
| 255 | |||
| 256 | found_it: | ||
| 257 | if (rp_ary->wc_discrim == 0) | ||
| 258 | return NULL; | ||
| 259 | |||
| 260 | return rp_ary; | ||
| 261 | } | ||
| 262 | #endif | ||
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h new file mode 100644 index 000000000000..6fd7b016517f --- /dev/null +++ b/include/linux/sunrpc/svc_xprt.h | |||
| @@ -0,0 +1,159 @@ | |||
| 1 | /* | ||
| 2 | * linux/include/linux/sunrpc/svc_xprt.h | ||
| 3 | * | ||
| 4 | * RPC server transport I/O | ||
| 5 | */ | ||
| 6 | |||
| 7 | #ifndef SUNRPC_SVC_XPRT_H | ||
| 8 | #define SUNRPC_SVC_XPRT_H | ||
| 9 | |||
| 10 | #include <linux/sunrpc/svc.h> | ||
| 11 | #include <linux/module.h> | ||
| 12 | |||
| 13 | struct svc_xprt_ops { | ||
| 14 | struct svc_xprt *(*xpo_create)(struct svc_serv *, | ||
| 15 | struct sockaddr *, int, | ||
| 16 | int); | ||
| 17 | struct svc_xprt *(*xpo_accept)(struct svc_xprt *); | ||
| 18 | int (*xpo_has_wspace)(struct svc_xprt *); | ||
| 19 | int (*xpo_recvfrom)(struct svc_rqst *); | ||
| 20 | void (*xpo_prep_reply_hdr)(struct svc_rqst *); | ||
| 21 | int (*xpo_sendto)(struct svc_rqst *); | ||
| 22 | void (*xpo_release_rqst)(struct svc_rqst *); | ||
| 23 | void (*xpo_detach)(struct svc_xprt *); | ||
| 24 | void (*xpo_free)(struct svc_xprt *); | ||
| 25 | }; | ||
| 26 | |||
| 27 | struct svc_xprt_class { | ||
| 28 | const char *xcl_name; | ||
| 29 | struct module *xcl_owner; | ||
| 30 | struct svc_xprt_ops *xcl_ops; | ||
| 31 | struct list_head xcl_list; | ||
| 32 | u32 xcl_max_payload; | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct svc_xprt { | ||
| 36 | struct svc_xprt_class *xpt_class; | ||
| 37 | struct svc_xprt_ops *xpt_ops; | ||
| 38 | struct kref xpt_ref; | ||
| 39 | struct list_head xpt_list; | ||
| 40 | struct list_head xpt_ready; | ||
| 41 | unsigned long xpt_flags; | ||
| 42 | #define XPT_BUSY 0 /* enqueued/receiving */ | ||
| 43 | #define XPT_CONN 1 /* conn pending */ | ||
| 44 | #define XPT_CLOSE 2 /* dead or dying */ | ||
| 45 | #define XPT_DATA 3 /* data pending */ | ||
| 46 | #define XPT_TEMP 4 /* connected transport */ | ||
| 47 | #define XPT_DEAD 6 /* transport closed */ | ||
| 48 | #define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */ | ||
| 49 | #define XPT_DEFERRED 8 /* deferred request pending */ | ||
| 50 | #define XPT_OLD 9 /* used for xprt aging mark+sweep */ | ||
| 51 | #define XPT_DETACHED 10 /* detached from tempsocks list */ | ||
| 52 | #define XPT_LISTENER 11 /* listening endpoint */ | ||
| 53 | #define XPT_CACHE_AUTH 12 /* cache auth info */ | ||
| 54 | |||
| 55 | struct svc_pool *xpt_pool; /* current pool iff queued */ | ||
| 56 | struct svc_serv *xpt_server; /* service for transport */ | ||
| 57 | atomic_t xpt_reserved; /* space on outq that is rsvd */ | ||
| 58 | struct mutex xpt_mutex; /* to serialize sending data */ | ||
| 59 | spinlock_t xpt_lock; /* protects sk_deferred | ||
| 60 | * and xpt_auth_cache */ | ||
| 61 | void *xpt_auth_cache;/* auth cache */ | ||
| 62 | struct list_head xpt_deferred; /* deferred requests that need | ||
| 63 | * to be revisted */ | ||
| 64 | struct sockaddr_storage xpt_local; /* local address */ | ||
| 65 | size_t xpt_locallen; /* length of address */ | ||
| 66 | struct sockaddr_storage xpt_remote; /* remote peer's address */ | ||
| 67 | size_t xpt_remotelen; /* length of address */ | ||
| 68 | }; | ||
| 69 | |||
| 70 | int svc_reg_xprt_class(struct svc_xprt_class *); | ||
| 71 | void svc_unreg_xprt_class(struct svc_xprt_class *); | ||
| 72 | void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *, | ||
| 73 | struct svc_serv *); | ||
| 74 | int svc_create_xprt(struct svc_serv *, char *, unsigned short, int); | ||
| 75 | void svc_xprt_enqueue(struct svc_xprt *xprt); | ||
| 76 | void svc_xprt_received(struct svc_xprt *); | ||
| 77 | void svc_xprt_put(struct svc_xprt *xprt); | ||
| 78 | void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); | ||
| 79 | void svc_close_xprt(struct svc_xprt *xprt); | ||
| 80 | void svc_delete_xprt(struct svc_xprt *xprt); | ||
| 81 | int svc_port_is_privileged(struct sockaddr *sin); | ||
| 82 | int svc_print_xprts(char *buf, int maxlen); | ||
| 83 | struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int); | ||
| 84 | int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen); | ||
| 85 | |||
| 86 | static inline void svc_xprt_get(struct svc_xprt *xprt) | ||
| 87 | { | ||
| 88 | kref_get(&xprt->xpt_ref); | ||
| 89 | } | ||
| 90 | static inline void svc_xprt_set_local(struct svc_xprt *xprt, | ||
| 91 | struct sockaddr *sa, int salen) | ||
| 92 | { | ||
| 93 | memcpy(&xprt->xpt_local, sa, salen); | ||
| 94 | xprt->xpt_locallen = salen; | ||
| 95 | } | ||
| 96 | static inline void svc_xprt_set_remote(struct svc_xprt *xprt, | ||
| 97 | struct sockaddr *sa, int salen) | ||
| 98 | { | ||
| 99 | memcpy(&xprt->xpt_remote, sa, salen); | ||
| 100 | xprt->xpt_remotelen = salen; | ||
| 101 | } | ||
| 102 | static inline unsigned short svc_addr_port(struct sockaddr *sa) | ||
| 103 | { | ||
| 104 | unsigned short ret = 0; | ||
| 105 | switch (sa->sa_family) { | ||
| 106 | case AF_INET: | ||
| 107 | ret = ntohs(((struct sockaddr_in *)sa)->sin_port); | ||
| 108 | break; | ||
| 109 | case AF_INET6: | ||
| 110 | ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port); | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | return ret; | ||
| 114 | } | ||
| 115 | |||
| 116 | static inline size_t svc_addr_len(struct sockaddr *sa) | ||
| 117 | { | ||
| 118 | switch (sa->sa_family) { | ||
| 119 | case AF_INET: | ||
| 120 | return sizeof(struct sockaddr_in); | ||
| 121 | case AF_INET6: | ||
| 122 | return sizeof(struct sockaddr_in6); | ||
| 123 | } | ||
| 124 | return -EAFNOSUPPORT; | ||
| 125 | } | ||
| 126 | |||
| 127 | static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt) | ||
| 128 | { | ||
| 129 | return svc_addr_port((struct sockaddr *)&xprt->xpt_local); | ||
| 130 | } | ||
| 131 | |||
| 132 | static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt) | ||
| 133 | { | ||
| 134 | return svc_addr_port((struct sockaddr *)&xprt->xpt_remote); | ||
| 135 | } | ||
| 136 | |||
| 137 | static inline char *__svc_print_addr(struct sockaddr *addr, | ||
| 138 | char *buf, size_t len) | ||
| 139 | { | ||
| 140 | switch (addr->sa_family) { | ||
| 141 | case AF_INET: | ||
| 142 | snprintf(buf, len, "%u.%u.%u.%u, port=%u", | ||
| 143 | NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), | ||
| 144 | ntohs(((struct sockaddr_in *) addr)->sin_port)); | ||
| 145 | break; | ||
| 146 | |||
| 147 | case AF_INET6: | ||
| 148 | snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", | ||
| 149 | NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), | ||
| 150 | ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); | ||
| 151 | break; | ||
| 152 | |||
| 153 | default: | ||
| 154 | snprintf(buf, len, "unknown address type: %d", addr->sa_family); | ||
| 155 | break; | ||
| 156 | } | ||
| 157 | return buf; | ||
| 158 | } | ||
| 159 | #endif /* SUNRPC_SVC_XPRT_H */ | ||
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index a53e0fa855d2..206f092ad4c7 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h | |||
| @@ -10,42 +10,16 @@ | |||
| 10 | #define SUNRPC_SVCSOCK_H | 10 | #define SUNRPC_SVCSOCK_H |
| 11 | 11 | ||
| 12 | #include <linux/sunrpc/svc.h> | 12 | #include <linux/sunrpc/svc.h> |
| 13 | #include <linux/sunrpc/svc_xprt.h> | ||
| 13 | 14 | ||
| 14 | /* | 15 | /* |
| 15 | * RPC server socket. | 16 | * RPC server socket. |
| 16 | */ | 17 | */ |
| 17 | struct svc_sock { | 18 | struct svc_sock { |
| 18 | struct list_head sk_ready; /* list of ready sockets */ | 19 | struct svc_xprt sk_xprt; |
| 19 | struct list_head sk_list; /* list of all sockets */ | ||
| 20 | struct socket * sk_sock; /* berkeley socket layer */ | 20 | struct socket * sk_sock; /* berkeley socket layer */ |
| 21 | struct sock * sk_sk; /* INET layer */ | 21 | struct sock * sk_sk; /* INET layer */ |
| 22 | 22 | ||
| 23 | struct svc_pool * sk_pool; /* current pool iff queued */ | ||
| 24 | struct svc_serv * sk_server; /* service for this socket */ | ||
| 25 | atomic_t sk_inuse; /* use count */ | ||
| 26 | unsigned long sk_flags; | ||
| 27 | #define SK_BUSY 0 /* enqueued/receiving */ | ||
| 28 | #define SK_CONN 1 /* conn pending */ | ||
| 29 | #define SK_CLOSE 2 /* dead or dying */ | ||
| 30 | #define SK_DATA 3 /* data pending */ | ||
| 31 | #define SK_TEMP 4 /* temp (TCP) socket */ | ||
| 32 | #define SK_DEAD 6 /* socket closed */ | ||
| 33 | #define SK_CHNGBUF 7 /* need to change snd/rcv buffer sizes */ | ||
| 34 | #define SK_DEFERRED 8 /* request on sk_deferred */ | ||
| 35 | #define SK_OLD 9 /* used for temp socket aging mark+sweep */ | ||
| 36 | #define SK_DETACHED 10 /* detached from tempsocks list */ | ||
| 37 | |||
| 38 | atomic_t sk_reserved; /* space on outq that is reserved */ | ||
| 39 | |||
| 40 | spinlock_t sk_lock; /* protects sk_deferred and | ||
| 41 | * sk_info_authunix */ | ||
| 42 | struct list_head sk_deferred; /* deferred requests that need to | ||
| 43 | * be revisted */ | ||
| 44 | struct mutex sk_mutex; /* to serialize sending data */ | ||
| 45 | |||
| 46 | int (*sk_recvfrom)(struct svc_rqst *rqstp); | ||
| 47 | int (*sk_sendto)(struct svc_rqst *rqstp); | ||
| 48 | |||
| 49 | /* We keep the old state_change and data_ready CB's here */ | 23 | /* We keep the old state_change and data_ready CB's here */ |
| 50 | void (*sk_ostate)(struct sock *); | 24 | void (*sk_ostate)(struct sock *); |
| 51 | void (*sk_odata)(struct sock *, int bytes); | 25 | void (*sk_odata)(struct sock *, int bytes); |
| @@ -54,21 +28,12 @@ struct svc_sock { | |||
| 54 | /* private TCP part */ | 28 | /* private TCP part */ |
| 55 | int sk_reclen; /* length of record */ | 29 | int sk_reclen; /* length of record */ |
| 56 | int sk_tcplen; /* current read length */ | 30 | int sk_tcplen; /* current read length */ |
| 57 | time_t sk_lastrecv; /* time of last received request */ | ||
| 58 | |||
| 59 | /* cache of various info for TCP sockets */ | ||
| 60 | void *sk_info_authunix; | ||
| 61 | |||
| 62 | struct sockaddr_storage sk_local; /* local address */ | ||
| 63 | struct sockaddr_storage sk_remote; /* remote peer's address */ | ||
| 64 | int sk_remotelen; /* length of address */ | ||
| 65 | }; | 31 | }; |
| 66 | 32 | ||
| 67 | /* | 33 | /* |
| 68 | * Function prototypes. | 34 | * Function prototypes. |
| 69 | */ | 35 | */ |
| 70 | int svc_makesock(struct svc_serv *, int, unsigned short, int flags); | 36 | void svc_close_all(struct list_head *); |
| 71 | void svc_force_close_socket(struct svc_sock *); | ||
| 72 | int svc_recv(struct svc_rqst *, long); | 37 | int svc_recv(struct svc_rqst *, long); |
| 73 | int svc_send(struct svc_rqst *); | 38 | int svc_send(struct svc_rqst *); |
| 74 | void svc_drop(struct svc_rqst *); | 39 | void svc_drop(struct svc_rqst *); |
| @@ -78,6 +43,8 @@ int svc_addsock(struct svc_serv *serv, | |||
| 78 | int fd, | 43 | int fd, |
| 79 | char *name_return, | 44 | char *name_return, |
| 80 | int *proto); | 45 | int *proto); |
| 46 | void svc_init_xprt_sock(void); | ||
| 47 | void svc_cleanup_xprt_sock(void); | ||
| 81 | 48 | ||
| 82 | /* | 49 | /* |
| 83 | * svc_makesock socket characteristics | 50 | * svc_makesock socket characteristics |
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 0751c9464d0f..e4057d729f03 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h | |||
| @@ -112,7 +112,8 @@ struct xdr_buf { | |||
| 112 | __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); | 112 | __be32 *xdr_encode_opaque_fixed(__be32 *p, const void *ptr, unsigned int len); |
| 113 | __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); | 113 | __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int len); |
| 114 | __be32 *xdr_encode_string(__be32 *p, const char *s); | 114 | __be32 *xdr_encode_string(__be32 *p, const char *s); |
| 115 | __be32 *xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen); | 115 | __be32 *xdr_decode_string_inplace(__be32 *p, char **sp, unsigned int *lenp, |
| 116 | unsigned int maxlen); | ||
| 116 | __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); | 117 | __be32 *xdr_encode_netobj(__be32 *p, const struct xdr_netobj *); |
| 117 | __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); | 118 | __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *); |
| 118 | 119 | ||
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 5c69a725e530..92e1dbe50947 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
| @@ -11,6 +11,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ | |||
| 11 | auth.o auth_null.o auth_unix.o \ | 11 | auth.o auth_null.o auth_unix.o \ |
| 12 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | 12 | svc.o svcsock.o svcauth.o svcauth_unix.o \ |
| 13 | rpcb_clnt.o timer.o xdr.o \ | 13 | rpcb_clnt.o timer.o xdr.o \ |
| 14 | sunrpc_syms.o cache.o rpc_pipe.o | 14 | sunrpc_syms.o cache.o rpc_pipe.o \ |
| 15 | svc_xprt.o | ||
| 15 | sunrpc-$(CONFIG_PROC_FS) += stats.o | 16 | sunrpc-$(CONFIG_PROC_FS) += stats.o |
| 16 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o | 17 | sunrpc-$(CONFIG_SYSCTL) += sysctl.o |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 73940df6c460..481f984e9a22 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
| @@ -224,38 +224,34 @@ static int rsi_parse(struct cache_detail *cd, | |||
| 224 | 224 | ||
| 225 | /* major/minor */ | 225 | /* major/minor */ |
| 226 | len = qword_get(&mesg, buf, mlen); | 226 | len = qword_get(&mesg, buf, mlen); |
| 227 | if (len < 0) | 227 | if (len <= 0) |
| 228 | goto out; | 228 | goto out; |
| 229 | if (len == 0) { | 229 | rsii.major_status = simple_strtoul(buf, &ep, 10); |
| 230 | if (*ep) | ||
| 231 | goto out; | ||
| 232 | len = qword_get(&mesg, buf, mlen); | ||
| 233 | if (len <= 0) | ||
| 234 | goto out; | ||
| 235 | rsii.minor_status = simple_strtoul(buf, &ep, 10); | ||
| 236 | if (*ep) | ||
| 230 | goto out; | 237 | goto out; |
| 231 | } else { | ||
| 232 | rsii.major_status = simple_strtoul(buf, &ep, 10); | ||
| 233 | if (*ep) | ||
| 234 | goto out; | ||
| 235 | len = qword_get(&mesg, buf, mlen); | ||
| 236 | if (len <= 0) | ||
| 237 | goto out; | ||
| 238 | rsii.minor_status = simple_strtoul(buf, &ep, 10); | ||
| 239 | if (*ep) | ||
| 240 | goto out; | ||
| 241 | 238 | ||
| 242 | /* out_handle */ | 239 | /* out_handle */ |
| 243 | len = qword_get(&mesg, buf, mlen); | 240 | len = qword_get(&mesg, buf, mlen); |
| 244 | if (len < 0) | 241 | if (len < 0) |
| 245 | goto out; | 242 | goto out; |
| 246 | status = -ENOMEM; | 243 | status = -ENOMEM; |
| 247 | if (dup_to_netobj(&rsii.out_handle, buf, len)) | 244 | if (dup_to_netobj(&rsii.out_handle, buf, len)) |
| 248 | goto out; | 245 | goto out; |
| 249 | 246 | ||
| 250 | /* out_token */ | 247 | /* out_token */ |
| 251 | len = qword_get(&mesg, buf, mlen); | 248 | len = qword_get(&mesg, buf, mlen); |
| 252 | status = -EINVAL; | 249 | status = -EINVAL; |
| 253 | if (len < 0) | 250 | if (len < 0) |
| 254 | goto out; | 251 | goto out; |
| 255 | status = -ENOMEM; | 252 | status = -ENOMEM; |
| 256 | if (dup_to_netobj(&rsii.out_token, buf, len)) | 253 | if (dup_to_netobj(&rsii.out_token, buf, len)) |
| 257 | goto out; | 254 | goto out; |
| 258 | } | ||
| 259 | rsii.h.expiry_time = expiry; | 255 | rsii.h.expiry_time = expiry; |
| 260 | rsip = rsi_update(&rsii, rsip); | 256 | rsip = rsi_update(&rsii, rsip); |
| 261 | status = 0; | 257 | status = 0; |
| @@ -975,6 +971,7 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, | |||
| 975 | struct kvec *resv = &rqstp->rq_res.head[0]; | 971 | struct kvec *resv = &rqstp->rq_res.head[0]; |
| 976 | struct xdr_netobj tmpobj; | 972 | struct xdr_netobj tmpobj; |
| 977 | struct rsi *rsip, rsikey; | 973 | struct rsi *rsip, rsikey; |
| 974 | int ret; | ||
| 978 | 975 | ||
| 979 | /* Read the verifier; should be NULL: */ | 976 | /* Read the verifier; should be NULL: */ |
| 980 | *authp = rpc_autherr_badverf; | 977 | *authp = rpc_autherr_badverf; |
| @@ -1014,23 +1011,27 @@ static int svcauth_gss_handle_init(struct svc_rqst *rqstp, | |||
| 1014 | /* No upcall result: */ | 1011 | /* No upcall result: */ |
| 1015 | return SVC_DROP; | 1012 | return SVC_DROP; |
| 1016 | case 0: | 1013 | case 0: |
| 1014 | ret = SVC_DROP; | ||
| 1017 | /* Got an answer to the upcall; use it: */ | 1015 | /* Got an answer to the upcall; use it: */ |
| 1018 | if (gss_write_init_verf(rqstp, rsip)) | 1016 | if (gss_write_init_verf(rqstp, rsip)) |
| 1019 | return SVC_DROP; | 1017 | goto out; |
| 1020 | if (resv->iov_len + 4 > PAGE_SIZE) | 1018 | if (resv->iov_len + 4 > PAGE_SIZE) |
| 1021 | return SVC_DROP; | 1019 | goto out; |
| 1022 | svc_putnl(resv, RPC_SUCCESS); | 1020 | svc_putnl(resv, RPC_SUCCESS); |
| 1023 | if (svc_safe_putnetobj(resv, &rsip->out_handle)) | 1021 | if (svc_safe_putnetobj(resv, &rsip->out_handle)) |
| 1024 | return SVC_DROP; | 1022 | goto out; |
| 1025 | if (resv->iov_len + 3 * 4 > PAGE_SIZE) | 1023 | if (resv->iov_len + 3 * 4 > PAGE_SIZE) |
| 1026 | return SVC_DROP; | 1024 | goto out; |
| 1027 | svc_putnl(resv, rsip->major_status); | 1025 | svc_putnl(resv, rsip->major_status); |
| 1028 | svc_putnl(resv, rsip->minor_status); | 1026 | svc_putnl(resv, rsip->minor_status); |
| 1029 | svc_putnl(resv, GSS_SEQ_WIN); | 1027 | svc_putnl(resv, GSS_SEQ_WIN); |
| 1030 | if (svc_safe_putnetobj(resv, &rsip->out_token)) | 1028 | if (svc_safe_putnetobj(resv, &rsip->out_token)) |
| 1031 | return SVC_DROP; | 1029 | goto out; |
| 1032 | } | 1030 | } |
| 1033 | return SVC_COMPLETE; | 1031 | ret = SVC_COMPLETE; |
| 1032 | out: | ||
| 1033 | cache_put(&rsip->h, &rsi_cache); | ||
| 1034 | return ret; | ||
| 1034 | } | 1035 | } |
| 1035 | 1036 | ||
| 1036 | /* | 1037 | /* |
| @@ -1125,6 +1126,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) | |||
| 1125 | case RPC_GSS_PROC_DESTROY: | 1126 | case RPC_GSS_PROC_DESTROY: |
| 1126 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) | 1127 | if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) |
| 1127 | goto auth_err; | 1128 | goto auth_err; |
| 1129 | rsci->h.expiry_time = get_seconds(); | ||
| 1128 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); | 1130 | set_bit(CACHE_NEGATIVE, &rsci->h.flags); |
| 1129 | if (resv->iov_len + 4 > PAGE_SIZE) | 1131 | if (resv->iov_len + 4 > PAGE_SIZE) |
| 1130 | goto drop; | 1132 | goto drop; |
| @@ -1386,19 +1388,26 @@ int | |||
| 1386 | gss_svc_init(void) | 1388 | gss_svc_init(void) |
| 1387 | { | 1389 | { |
| 1388 | int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); | 1390 | int rv = svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); |
| 1389 | if (rv == 0) { | 1391 | if (rv) |
| 1390 | cache_register(&rsc_cache); | 1392 | return rv; |
| 1391 | cache_register(&rsi_cache); | 1393 | rv = cache_register(&rsc_cache); |
| 1392 | } | 1394 | if (rv) |
| 1395 | goto out1; | ||
| 1396 | rv = cache_register(&rsi_cache); | ||
| 1397 | if (rv) | ||
| 1398 | goto out2; | ||
| 1399 | return 0; | ||
| 1400 | out2: | ||
| 1401 | cache_unregister(&rsc_cache); | ||
| 1402 | out1: | ||
| 1403 | svc_auth_unregister(RPC_AUTH_GSS); | ||
| 1393 | return rv; | 1404 | return rv; |
| 1394 | } | 1405 | } |
| 1395 | 1406 | ||
| 1396 | void | 1407 | void |
| 1397 | gss_svc_shutdown(void) | 1408 | gss_svc_shutdown(void) |
| 1398 | { | 1409 | { |
| 1399 | if (cache_unregister(&rsc_cache)) | 1410 | cache_unregister(&rsc_cache); |
| 1400 | printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); | 1411 | cache_unregister(&rsi_cache); |
| 1401 | if (cache_unregister(&rsi_cache)) | ||
| 1402 | printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n"); | ||
| 1403 | svc_auth_unregister(RPC_AUTH_GSS); | 1412 | svc_auth_unregister(RPC_AUTH_GSS); |
| 1404 | } | 1413 | } |
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 73f053d0cc7a..636c8e04e0be 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c | |||
| @@ -245,6 +245,7 @@ int cache_check(struct cache_detail *detail, | |||
| 245 | cache_put(h, detail); | 245 | cache_put(h, detail); |
| 246 | return rv; | 246 | return rv; |
| 247 | } | 247 | } |
| 248 | EXPORT_SYMBOL(cache_check); | ||
| 248 | 249 | ||
| 249 | /* | 250 | /* |
| 250 | * caches need to be periodically cleaned. | 251 | * caches need to be periodically cleaned. |
| @@ -290,44 +291,78 @@ static const struct file_operations cache_flush_operations; | |||
| 290 | static void do_cache_clean(struct work_struct *work); | 291 | static void do_cache_clean(struct work_struct *work); |
| 291 | static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); | 292 | static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); |
| 292 | 293 | ||
| 293 | void cache_register(struct cache_detail *cd) | 294 | static void remove_cache_proc_entries(struct cache_detail *cd) |
| 294 | { | 295 | { |
| 295 | cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); | 296 | if (cd->proc_ent == NULL) |
| 296 | if (cd->proc_ent) { | 297 | return; |
| 297 | struct proc_dir_entry *p; | 298 | if (cd->flush_ent) |
| 298 | cd->proc_ent->owner = cd->owner; | 299 | remove_proc_entry("flush", cd->proc_ent); |
| 299 | cd->channel_ent = cd->content_ent = NULL; | 300 | if (cd->channel_ent) |
| 301 | remove_proc_entry("channel", cd->proc_ent); | ||
| 302 | if (cd->content_ent) | ||
| 303 | remove_proc_entry("content", cd->proc_ent); | ||
| 304 | cd->proc_ent = NULL; | ||
| 305 | remove_proc_entry(cd->name, proc_net_rpc); | ||
| 306 | } | ||
| 300 | 307 | ||
| 301 | p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, | 308 | #ifdef CONFIG_PROC_FS |
| 302 | cd->proc_ent); | 309 | static int create_cache_proc_entries(struct cache_detail *cd) |
| 303 | cd->flush_ent = p; | 310 | { |
| 304 | if (p) { | 311 | struct proc_dir_entry *p; |
| 305 | p->proc_fops = &cache_flush_operations; | ||
| 306 | p->owner = cd->owner; | ||
| 307 | p->data = cd; | ||
| 308 | } | ||
| 309 | 312 | ||
| 310 | if (cd->cache_request || cd->cache_parse) { | 313 | cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); |
| 311 | p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, | 314 | if (cd->proc_ent == NULL) |
| 312 | cd->proc_ent); | 315 | goto out_nomem; |
| 313 | cd->channel_ent = p; | 316 | cd->proc_ent->owner = cd->owner; |
| 314 | if (p) { | 317 | cd->channel_ent = cd->content_ent = NULL; |
| 315 | p->proc_fops = &cache_file_operations; | 318 | |
| 316 | p->owner = cd->owner; | 319 | p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, cd->proc_ent); |
| 317 | p->data = cd; | 320 | cd->flush_ent = p; |
| 318 | } | 321 | if (p == NULL) |
| 319 | } | 322 | goto out_nomem; |
| 320 | if (cd->cache_show) { | 323 | p->proc_fops = &cache_flush_operations; |
| 321 | p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, | 324 | p->owner = cd->owner; |
| 322 | cd->proc_ent); | 325 | p->data = cd; |
| 323 | cd->content_ent = p; | 326 | |
| 324 | if (p) { | 327 | if (cd->cache_request || cd->cache_parse) { |
| 325 | p->proc_fops = &content_file_operations; | 328 | p = create_proc_entry("channel", S_IFREG|S_IRUSR|S_IWUSR, |
| 326 | p->owner = cd->owner; | 329 | cd->proc_ent); |
| 327 | p->data = cd; | 330 | cd->channel_ent = p; |
| 328 | } | 331 | if (p == NULL) |
| 329 | } | 332 | goto out_nomem; |
| 333 | p->proc_fops = &cache_file_operations; | ||
| 334 | p->owner = cd->owner; | ||
| 335 | p->data = cd; | ||
| 330 | } | 336 | } |
| 337 | if (cd->cache_show) { | ||
| 338 | p = create_proc_entry("content", S_IFREG|S_IRUSR|S_IWUSR, | ||
| 339 | cd->proc_ent); | ||
| 340 | cd->content_ent = p; | ||
| 341 | if (p == NULL) | ||
| 342 | goto out_nomem; | ||
| 343 | p->proc_fops = &content_file_operations; | ||
| 344 | p->owner = cd->owner; | ||
| 345 | p->data = cd; | ||
| 346 | } | ||
| 347 | return 0; | ||
| 348 | out_nomem: | ||
| 349 | remove_cache_proc_entries(cd); | ||
| 350 | return -ENOMEM; | ||
| 351 | } | ||
| 352 | #else /* CONFIG_PROC_FS */ | ||
| 353 | static int create_cache_proc_entries(struct cache_detail *cd) | ||
| 354 | { | ||
| 355 | return 0; | ||
| 356 | } | ||
| 357 | #endif | ||
| 358 | |||
| 359 | int cache_register(struct cache_detail *cd) | ||
| 360 | { | ||
| 361 | int ret; | ||
| 362 | |||
| 363 | ret = create_cache_proc_entries(cd); | ||
| 364 | if (ret) | ||
| 365 | return ret; | ||
| 331 | rwlock_init(&cd->hash_lock); | 366 | rwlock_init(&cd->hash_lock); |
| 332 | INIT_LIST_HEAD(&cd->queue); | 367 | INIT_LIST_HEAD(&cd->queue); |
| 333 | spin_lock(&cache_list_lock); | 368 | spin_lock(&cache_list_lock); |
| @@ -341,9 +376,11 @@ void cache_register(struct cache_detail *cd) | |||
| 341 | 376 | ||
| 342 | /* start the cleaning process */ | 377 | /* start the cleaning process */ |
| 343 | schedule_delayed_work(&cache_cleaner, 0); | 378 | schedule_delayed_work(&cache_cleaner, 0); |
| 379 | return 0; | ||
| 344 | } | 380 | } |
| 381 | EXPORT_SYMBOL(cache_register); | ||
| 345 | 382 | ||
| 346 | int cache_unregister(struct cache_detail *cd) | 383 | void cache_unregister(struct cache_detail *cd) |
| 347 | { | 384 | { |
| 348 | cache_purge(cd); | 385 | cache_purge(cd); |
| 349 | spin_lock(&cache_list_lock); | 386 | spin_lock(&cache_list_lock); |
| @@ -351,30 +388,23 @@ int cache_unregister(struct cache_detail *cd) | |||
| 351 | if (cd->entries || atomic_read(&cd->inuse)) { | 388 | if (cd->entries || atomic_read(&cd->inuse)) { |
| 352 | write_unlock(&cd->hash_lock); | 389 | write_unlock(&cd->hash_lock); |
| 353 | spin_unlock(&cache_list_lock); | 390 | spin_unlock(&cache_list_lock); |
| 354 | return -EBUSY; | 391 | goto out; |
| 355 | } | 392 | } |
| 356 | if (current_detail == cd) | 393 | if (current_detail == cd) |
| 357 | current_detail = NULL; | 394 | current_detail = NULL; |
| 358 | list_del_init(&cd->others); | 395 | list_del_init(&cd->others); |
| 359 | write_unlock(&cd->hash_lock); | 396 | write_unlock(&cd->hash_lock); |
| 360 | spin_unlock(&cache_list_lock); | 397 | spin_unlock(&cache_list_lock); |
| 361 | if (cd->proc_ent) { | 398 | remove_cache_proc_entries(cd); |
| 362 | if (cd->flush_ent) | ||
| 363 | remove_proc_entry("flush", cd->proc_ent); | ||
| 364 | if (cd->channel_ent) | ||
| 365 | remove_proc_entry("channel", cd->proc_ent); | ||
| 366 | if (cd->content_ent) | ||
| 367 | remove_proc_entry("content", cd->proc_ent); | ||
| 368 | |||
| 369 | cd->proc_ent = NULL; | ||
| 370 | remove_proc_entry(cd->name, proc_net_rpc); | ||
| 371 | } | ||
| 372 | if (list_empty(&cache_list)) { | 399 | if (list_empty(&cache_list)) { |
| 373 | /* module must be being unloaded so its safe to kill the worker */ | 400 | /* module must be being unloaded so its safe to kill the worker */ |
| 374 | cancel_delayed_work_sync(&cache_cleaner); | 401 | cancel_delayed_work_sync(&cache_cleaner); |
| 375 | } | 402 | } |
| 376 | return 0; | 403 | return; |
| 404 | out: | ||
| 405 | printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); | ||
| 377 | } | 406 | } |
| 407 | EXPORT_SYMBOL(cache_unregister); | ||
| 378 | 408 | ||
| 379 | /* clean cache tries to find something to clean | 409 | /* clean cache tries to find something to clean |
| 380 | * and cleans it. | 410 | * and cleans it. |
| @@ -489,6 +519,7 @@ void cache_flush(void) | |||
| 489 | while (cache_clean() != -1) | 519 | while (cache_clean() != -1) |
| 490 | cond_resched(); | 520 | cond_resched(); |
| 491 | } | 521 | } |
| 522 | EXPORT_SYMBOL(cache_flush); | ||
| 492 | 523 | ||
| 493 | void cache_purge(struct cache_detail *detail) | 524 | void cache_purge(struct cache_detail *detail) |
| 494 | { | 525 | { |
| @@ -497,7 +528,7 @@ void cache_purge(struct cache_detail *detail) | |||
| 497 | cache_flush(); | 528 | cache_flush(); |
| 498 | detail->flush_time = 1; | 529 | detail->flush_time = 1; |
| 499 | } | 530 | } |
| 500 | 531 | EXPORT_SYMBOL(cache_purge); | |
| 501 | 532 | ||
| 502 | 533 | ||
| 503 | /* | 534 | /* |
| @@ -634,13 +665,13 @@ void cache_clean_deferred(void *owner) | |||
| 634 | /* | 665 | /* |
| 635 | * communicate with user-space | 666 | * communicate with user-space |
| 636 | * | 667 | * |
| 637 | * We have a magic /proc file - /proc/sunrpc/cache | 668 | * We have a magic /proc file - /proc/sunrpc/<cachename>/channel. |
| 638 | * On read, you get a full request, or block | 669 | * On read, you get a full request, or block. |
| 639 | * On write, an update request is processed | 670 | * On write, an update request is processed. |
| 640 | * Poll works if anything to read, and always allows write | 671 | * Poll works if anything to read, and always allows write. |
| 641 | * | 672 | * |
| 642 | * Implemented by linked list of requests. Each open file has | 673 | * Implemented by linked list of requests. Each open file has |
| 643 | * a ->private that also exists in this list. New request are added | 674 | * a ->private that also exists in this list. New requests are added |
| 644 | * to the end and may wakeup and preceding readers. | 675 | * to the end and may wakeup and preceding readers. |
| 645 | * New readers are added to the head. If, on read, an item is found with | 676 | * New readers are added to the head. If, on read, an item is found with |
| 646 | * CACHE_UPCALLING clear, we free it from the list. | 677 | * CACHE_UPCALLING clear, we free it from the list. |
| @@ -963,6 +994,7 @@ void qword_add(char **bpp, int *lp, char *str) | |||
| 963 | *bpp = bp; | 994 | *bpp = bp; |
| 964 | *lp = len; | 995 | *lp = len; |
| 965 | } | 996 | } |
| 997 | EXPORT_SYMBOL(qword_add); | ||
| 966 | 998 | ||
| 967 | void qword_addhex(char **bpp, int *lp, char *buf, int blen) | 999 | void qword_addhex(char **bpp, int *lp, char *buf, int blen) |
| 968 | { | 1000 | { |
| @@ -991,6 +1023,7 @@ void qword_addhex(char **bpp, int *lp, char *buf, int blen) | |||
| 991 | *bpp = bp; | 1023 | *bpp = bp; |
| 992 | *lp = len; | 1024 | *lp = len; |
| 993 | } | 1025 | } |
| 1026 | EXPORT_SYMBOL(qword_addhex); | ||
| 994 | 1027 | ||
| 995 | static void warn_no_listener(struct cache_detail *detail) | 1028 | static void warn_no_listener(struct cache_detail *detail) |
| 996 | { | 1029 | { |
| @@ -1113,6 +1146,7 @@ int qword_get(char **bpp, char *dest, int bufsize) | |||
| 1113 | *dest = '\0'; | 1146 | *dest = '\0'; |
| 1114 | return len; | 1147 | return len; |
| 1115 | } | 1148 | } |
| 1149 | EXPORT_SYMBOL(qword_get); | ||
| 1116 | 1150 | ||
| 1117 | 1151 | ||
| 1118 | /* | 1152 | /* |
| @@ -1244,18 +1278,18 @@ static ssize_t read_flush(struct file *file, char __user *buf, | |||
| 1244 | struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; | 1278 | struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; |
| 1245 | char tbuf[20]; | 1279 | char tbuf[20]; |
| 1246 | unsigned long p = *ppos; | 1280 | unsigned long p = *ppos; |
| 1247 | int len; | 1281 | size_t len; |
| 1248 | 1282 | ||
| 1249 | sprintf(tbuf, "%lu\n", cd->flush_time); | 1283 | sprintf(tbuf, "%lu\n", cd->flush_time); |
| 1250 | len = strlen(tbuf); | 1284 | len = strlen(tbuf); |
| 1251 | if (p >= len) | 1285 | if (p >= len) |
| 1252 | return 0; | 1286 | return 0; |
| 1253 | len -= p; | 1287 | len -= p; |
| 1254 | if (len > count) len = count; | 1288 | if (len > count) |
| 1289 | len = count; | ||
| 1255 | if (copy_to_user(buf, (void*)(tbuf+p), len)) | 1290 | if (copy_to_user(buf, (void*)(tbuf+p), len)) |
| 1256 | len = -EFAULT; | 1291 | return -EFAULT; |
| 1257 | else | 1292 | *ppos += len; |
| 1258 | *ppos += len; | ||
| 1259 | return len; | 1293 | return len; |
| 1260 | } | 1294 | } |
| 1261 | 1295 | ||
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 74df2d358e61..5a16875f5ac8 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c | |||
| @@ -33,7 +33,7 @@ struct proc_dir_entry *proc_net_rpc = NULL; | |||
| 33 | static int rpc_proc_show(struct seq_file *seq, void *v) { | 33 | static int rpc_proc_show(struct seq_file *seq, void *v) { |
| 34 | const struct rpc_stat *statp = seq->private; | 34 | const struct rpc_stat *statp = seq->private; |
| 35 | const struct rpc_program *prog = statp->program; | 35 | const struct rpc_program *prog = statp->program; |
| 36 | int i, j; | 36 | unsigned int i, j; |
| 37 | 37 | ||
| 38 | seq_printf(seq, | 38 | seq_printf(seq, |
| 39 | "net %u %u %u %u\n", | 39 | "net %u %u %u %u\n", |
| @@ -81,7 +81,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { | |||
| 81 | const struct svc_program *prog = statp->program; | 81 | const struct svc_program *prog = statp->program; |
| 82 | const struct svc_procedure *proc; | 82 | const struct svc_procedure *proc; |
| 83 | const struct svc_version *vers; | 83 | const struct svc_version *vers; |
| 84 | int i, j; | 84 | unsigned int i, j; |
| 85 | 85 | ||
| 86 | seq_printf(seq, | 86 | seq_printf(seq, |
| 87 | "net %u %u %u %u\n", | 87 | "net %u %u %u %u\n", |
| @@ -106,6 +106,7 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { | |||
| 106 | seq_putc(seq, '\n'); | 106 | seq_putc(seq, '\n'); |
| 107 | } | 107 | } |
| 108 | } | 108 | } |
| 109 | EXPORT_SYMBOL(svc_seq_show); | ||
| 109 | 110 | ||
| 110 | /** | 111 | /** |
| 111 | * rpc_alloc_iostats - allocate an rpc_iostats structure | 112 | * rpc_alloc_iostats - allocate an rpc_iostats structure |
| @@ -255,12 +256,14 @@ svc_proc_register(struct svc_stat *statp, const struct file_operations *fops) | |||
| 255 | { | 256 | { |
| 256 | return do_register(statp->program->pg_name, statp, fops); | 257 | return do_register(statp->program->pg_name, statp, fops); |
| 257 | } | 258 | } |
| 259 | EXPORT_SYMBOL(svc_proc_register); | ||
| 258 | 260 | ||
| 259 | void | 261 | void |
| 260 | svc_proc_unregister(const char *name) | 262 | svc_proc_unregister(const char *name) |
| 261 | { | 263 | { |
| 262 | remove_proc_entry(name, proc_net_rpc); | 264 | remove_proc_entry(name, proc_net_rpc); |
| 263 | } | 265 | } |
| 266 | EXPORT_SYMBOL(svc_proc_unregister); | ||
| 264 | 267 | ||
| 265 | void | 268 | void |
| 266 | rpc_proc_init(void) | 269 | rpc_proc_init(void) |
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 1a7e309d008b..843629f55763 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
| @@ -22,48 +22,6 @@ | |||
| 22 | #include <linux/sunrpc/rpc_pipe_fs.h> | 22 | #include <linux/sunrpc/rpc_pipe_fs.h> |
| 23 | #include <linux/sunrpc/xprtsock.h> | 23 | #include <linux/sunrpc/xprtsock.h> |
| 24 | 24 | ||
| 25 | /* RPC server stuff */ | ||
| 26 | EXPORT_SYMBOL(svc_create); | ||
| 27 | EXPORT_SYMBOL(svc_create_thread); | ||
| 28 | EXPORT_SYMBOL(svc_create_pooled); | ||
| 29 | EXPORT_SYMBOL(svc_set_num_threads); | ||
| 30 | EXPORT_SYMBOL(svc_exit_thread); | ||
| 31 | EXPORT_SYMBOL(svc_destroy); | ||
| 32 | EXPORT_SYMBOL(svc_drop); | ||
| 33 | EXPORT_SYMBOL(svc_process); | ||
| 34 | EXPORT_SYMBOL(svc_recv); | ||
| 35 | EXPORT_SYMBOL(svc_wake_up); | ||
| 36 | EXPORT_SYMBOL(svc_makesock); | ||
| 37 | EXPORT_SYMBOL(svc_reserve); | ||
| 38 | EXPORT_SYMBOL(svc_auth_register); | ||
| 39 | EXPORT_SYMBOL(auth_domain_lookup); | ||
| 40 | EXPORT_SYMBOL(svc_authenticate); | ||
| 41 | EXPORT_SYMBOL(svc_set_client); | ||
| 42 | |||
| 43 | /* RPC statistics */ | ||
| 44 | #ifdef CONFIG_PROC_FS | ||
| 45 | EXPORT_SYMBOL(svc_proc_register); | ||
| 46 | EXPORT_SYMBOL(svc_proc_unregister); | ||
| 47 | EXPORT_SYMBOL(svc_seq_show); | ||
| 48 | #endif | ||
| 49 | |||
| 50 | /* caching... */ | ||
| 51 | EXPORT_SYMBOL(auth_domain_find); | ||
| 52 | EXPORT_SYMBOL(auth_domain_put); | ||
| 53 | EXPORT_SYMBOL(auth_unix_add_addr); | ||
| 54 | EXPORT_SYMBOL(auth_unix_forget_old); | ||
| 55 | EXPORT_SYMBOL(auth_unix_lookup); | ||
| 56 | EXPORT_SYMBOL(cache_check); | ||
| 57 | EXPORT_SYMBOL(cache_flush); | ||
| 58 | EXPORT_SYMBOL(cache_purge); | ||
| 59 | EXPORT_SYMBOL(cache_register); | ||
| 60 | EXPORT_SYMBOL(cache_unregister); | ||
| 61 | EXPORT_SYMBOL(qword_add); | ||
| 62 | EXPORT_SYMBOL(qword_addhex); | ||
| 63 | EXPORT_SYMBOL(qword_get); | ||
| 64 | EXPORT_SYMBOL(svcauth_unix_purge); | ||
| 65 | EXPORT_SYMBOL(unix_domain_find); | ||
| 66 | |||
| 67 | extern struct cache_detail ip_map_cache, unix_gid_cache; | 25 | extern struct cache_detail ip_map_cache, unix_gid_cache; |
| 68 | 26 | ||
| 69 | static int __init | 27 | static int __init |
| @@ -85,7 +43,8 @@ init_sunrpc(void) | |||
| 85 | #endif | 43 | #endif |
| 86 | cache_register(&ip_map_cache); | 44 | cache_register(&ip_map_cache); |
| 87 | cache_register(&unix_gid_cache); | 45 | cache_register(&unix_gid_cache); |
| 88 | init_socket_xprt(); | 46 | svc_init_xprt_sock(); /* svc sock transport */ |
| 47 | init_socket_xprt(); /* clnt sock transport */ | ||
| 89 | rpcauth_init_module(); | 48 | rpcauth_init_module(); |
| 90 | out: | 49 | out: |
| 91 | return err; | 50 | return err; |
| @@ -96,12 +55,11 @@ cleanup_sunrpc(void) | |||
| 96 | { | 55 | { |
| 97 | rpcauth_remove_module(); | 56 | rpcauth_remove_module(); |
| 98 | cleanup_socket_xprt(); | 57 | cleanup_socket_xprt(); |
| 58 | svc_cleanup_xprt_sock(); | ||
| 99 | unregister_rpc_pipefs(); | 59 | unregister_rpc_pipefs(); |
| 100 | rpc_destroy_mempool(); | 60 | rpc_destroy_mempool(); |
| 101 | if (cache_unregister(&ip_map_cache)) | 61 | cache_unregister(&ip_map_cache); |
| 102 | printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); | 62 | cache_unregister(&unix_gid_cache); |
| 103 | if (cache_unregister(&unix_gid_cache)) | ||
| 104 | printk(KERN_ERR "sunrpc: failed to unregister unix_gid cache\n"); | ||
| 105 | #ifdef RPC_DEBUG | 63 | #ifdef RPC_DEBUG |
| 106 | rpc_unregister_sysctl(); | 64 | rpc_unregister_sysctl(); |
| 107 | #endif | 65 | #endif |
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4ad5fbbb18b4..a290e1523297 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c | |||
| @@ -364,7 +364,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, | |||
| 364 | void (*shutdown)(struct svc_serv *serv)) | 364 | void (*shutdown)(struct svc_serv *serv)) |
| 365 | { | 365 | { |
| 366 | struct svc_serv *serv; | 366 | struct svc_serv *serv; |
| 367 | int vers; | 367 | unsigned int vers; |
| 368 | unsigned int xdrsize; | 368 | unsigned int xdrsize; |
| 369 | unsigned int i; | 369 | unsigned int i; |
| 370 | 370 | ||
| @@ -433,6 +433,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize, | |||
| 433 | { | 433 | { |
| 434 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); | 434 | return __svc_create(prog, bufsize, /*npools*/1, shutdown); |
| 435 | } | 435 | } |
| 436 | EXPORT_SYMBOL(svc_create); | ||
| 436 | 437 | ||
| 437 | struct svc_serv * | 438 | struct svc_serv * |
| 438 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | 439 | svc_create_pooled(struct svc_program *prog, unsigned int bufsize, |
| @@ -452,6 +453,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | |||
| 452 | 453 | ||
| 453 | return serv; | 454 | return serv; |
| 454 | } | 455 | } |
| 456 | EXPORT_SYMBOL(svc_create_pooled); | ||
| 455 | 457 | ||
| 456 | /* | 458 | /* |
| 457 | * Destroy an RPC service. Should be called with the BKL held | 459 | * Destroy an RPC service. Should be called with the BKL held |
| @@ -459,9 +461,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, | |||
| 459 | void | 461 | void |
| 460 | svc_destroy(struct svc_serv *serv) | 462 | svc_destroy(struct svc_serv *serv) |
| 461 | { | 463 | { |
| 462 | struct svc_sock *svsk; | ||
| 463 | struct svc_sock *tmp; | ||
| 464 | |||
| 465 | dprintk("svc: svc_destroy(%s, %d)\n", | 464 | dprintk("svc: svc_destroy(%s, %d)\n", |
| 466 | serv->sv_program->pg_name, | 465 | serv->sv_program->pg_name, |
| 467 | serv->sv_nrthreads); | 466 | serv->sv_nrthreads); |
| @@ -476,14 +475,12 @@ svc_destroy(struct svc_serv *serv) | |||
| 476 | 475 | ||
| 477 | del_timer_sync(&serv->sv_temptimer); | 476 | del_timer_sync(&serv->sv_temptimer); |
| 478 | 477 | ||
| 479 | list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list) | 478 | svc_close_all(&serv->sv_tempsocks); |
| 480 | svc_force_close_socket(svsk); | ||
| 481 | 479 | ||
| 482 | if (serv->sv_shutdown) | 480 | if (serv->sv_shutdown) |
| 483 | serv->sv_shutdown(serv); | 481 | serv->sv_shutdown(serv); |
| 484 | 482 | ||
| 485 | list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list) | 483 | svc_close_all(&serv->sv_permsocks); |
| 486 | svc_force_close_socket(svsk); | ||
| 487 | 484 | ||
| 488 | BUG_ON(!list_empty(&serv->sv_permsocks)); | 485 | BUG_ON(!list_empty(&serv->sv_permsocks)); |
| 489 | BUG_ON(!list_empty(&serv->sv_tempsocks)); | 486 | BUG_ON(!list_empty(&serv->sv_tempsocks)); |
| @@ -498,6 +495,7 @@ svc_destroy(struct svc_serv *serv) | |||
| 498 | kfree(serv->sv_pools); | 495 | kfree(serv->sv_pools); |
| 499 | kfree(serv); | 496 | kfree(serv); |
| 500 | } | 497 | } |
| 498 | EXPORT_SYMBOL(svc_destroy); | ||
| 501 | 499 | ||
| 502 | /* | 500 | /* |
| 503 | * Allocate an RPC server's buffer space. | 501 | * Allocate an RPC server's buffer space. |
| @@ -536,31 +534,17 @@ svc_release_buffer(struct svc_rqst *rqstp) | |||
| 536 | put_page(rqstp->rq_pages[i]); | 534 | put_page(rqstp->rq_pages[i]); |
| 537 | } | 535 | } |
| 538 | 536 | ||
| 539 | /* | 537 | struct svc_rqst * |
| 540 | * Create a thread in the given pool. Caller must hold BKL. | 538 | svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool) |
| 541 | * On a NUMA or SMP machine, with a multi-pool serv, the thread | ||
| 542 | * will be restricted to run on the cpus belonging to the pool. | ||
| 543 | */ | ||
| 544 | static int | ||
| 545 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | ||
| 546 | struct svc_pool *pool) | ||
| 547 | { | 539 | { |
| 548 | struct svc_rqst *rqstp; | 540 | struct svc_rqst *rqstp; |
| 549 | int error = -ENOMEM; | ||
| 550 | int have_oldmask = 0; | ||
| 551 | cpumask_t oldmask; | ||
| 552 | 541 | ||
| 553 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); | 542 | rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL); |
| 554 | if (!rqstp) | 543 | if (!rqstp) |
| 555 | goto out; | 544 | goto out_enomem; |
| 556 | 545 | ||
| 557 | init_waitqueue_head(&rqstp->rq_wait); | 546 | init_waitqueue_head(&rqstp->rq_wait); |
| 558 | 547 | ||
| 559 | if (!(rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
| 560 | || !(rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL)) | ||
| 561 | || !svc_init_buffer(rqstp, serv->sv_max_mesg)) | ||
| 562 | goto out_thread; | ||
| 563 | |||
| 564 | serv->sv_nrthreads++; | 548 | serv->sv_nrthreads++; |
| 565 | spin_lock_bh(&pool->sp_lock); | 549 | spin_lock_bh(&pool->sp_lock); |
| 566 | pool->sp_nrthreads++; | 550 | pool->sp_nrthreads++; |
| @@ -569,6 +553,45 @@ __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | |||
| 569 | rqstp->rq_server = serv; | 553 | rqstp->rq_server = serv; |
| 570 | rqstp->rq_pool = pool; | 554 | rqstp->rq_pool = pool; |
| 571 | 555 | ||
| 556 | rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); | ||
| 557 | if (!rqstp->rq_argp) | ||
| 558 | goto out_thread; | ||
| 559 | |||
| 560 | rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL); | ||
| 561 | if (!rqstp->rq_resp) | ||
| 562 | goto out_thread; | ||
| 563 | |||
| 564 | if (!svc_init_buffer(rqstp, serv->sv_max_mesg)) | ||
| 565 | goto out_thread; | ||
| 566 | |||
| 567 | return rqstp; | ||
| 568 | out_thread: | ||
| 569 | svc_exit_thread(rqstp); | ||
| 570 | out_enomem: | ||
| 571 | return ERR_PTR(-ENOMEM); | ||
| 572 | } | ||
| 573 | EXPORT_SYMBOL(svc_prepare_thread); | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Create a thread in the given pool. Caller must hold BKL. | ||
| 577 | * On a NUMA or SMP machine, with a multi-pool serv, the thread | ||
| 578 | * will be restricted to run on the cpus belonging to the pool. | ||
| 579 | */ | ||
| 580 | static int | ||
| 581 | __svc_create_thread(svc_thread_fn func, struct svc_serv *serv, | ||
| 582 | struct svc_pool *pool) | ||
| 583 | { | ||
| 584 | struct svc_rqst *rqstp; | ||
| 585 | int error = -ENOMEM; | ||
| 586 | int have_oldmask = 0; | ||
| 587 | cpumask_t oldmask; | ||
| 588 | |||
| 589 | rqstp = svc_prepare_thread(serv, pool); | ||
| 590 | if (IS_ERR(rqstp)) { | ||
| 591 | error = PTR_ERR(rqstp); | ||
| 592 | goto out; | ||
| 593 | } | ||
| 594 | |||
| 572 | if (serv->sv_nrpools > 1) | 595 | if (serv->sv_nrpools > 1) |
| 573 | have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); | 596 | have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask); |
| 574 | 597 | ||
| @@ -597,6 +620,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) | |||
| 597 | { | 620 | { |
| 598 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); | 621 | return __svc_create_thread(func, serv, &serv->sv_pools[0]); |
| 599 | } | 622 | } |
| 623 | EXPORT_SYMBOL(svc_create_thread); | ||
| 600 | 624 | ||
| 601 | /* | 625 | /* |
| 602 | * Choose a pool in which to create a new thread, for svc_set_num_threads | 626 | * Choose a pool in which to create a new thread, for svc_set_num_threads |
| @@ -700,6 +724,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) | |||
| 700 | 724 | ||
| 701 | return error; | 725 | return error; |
| 702 | } | 726 | } |
| 727 | EXPORT_SYMBOL(svc_set_num_threads); | ||
| 703 | 728 | ||
| 704 | /* | 729 | /* |
| 705 | * Called from a server thread as it's exiting. Caller must hold BKL. | 730 | * Called from a server thread as it's exiting. Caller must hold BKL. |
| @@ -726,6 +751,7 @@ svc_exit_thread(struct svc_rqst *rqstp) | |||
| 726 | if (serv) | 751 | if (serv) |
| 727 | svc_destroy(serv); | 752 | svc_destroy(serv); |
| 728 | } | 753 | } |
| 754 | EXPORT_SYMBOL(svc_exit_thread); | ||
| 729 | 755 | ||
| 730 | /* | 756 | /* |
| 731 | * Register an RPC service with the local portmapper. | 757 | * Register an RPC service with the local portmapper. |
| @@ -737,7 +763,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) | |||
| 737 | { | 763 | { |
| 738 | struct svc_program *progp; | 764 | struct svc_program *progp; |
| 739 | unsigned long flags; | 765 | unsigned long flags; |
| 740 | int i, error = 0, dummy; | 766 | unsigned int i; |
| 767 | int error = 0, dummy; | ||
| 741 | 768 | ||
| 742 | if (!port) | 769 | if (!port) |
| 743 | clear_thread_flag(TIF_SIGPENDING); | 770 | clear_thread_flag(TIF_SIGPENDING); |
| @@ -840,9 +867,9 @@ svc_process(struct svc_rqst *rqstp) | |||
| 840 | rqstp->rq_res.tail[0].iov_len = 0; | 867 | rqstp->rq_res.tail[0].iov_len = 0; |
| 841 | /* Will be turned off only in gss privacy case: */ | 868 | /* Will be turned off only in gss privacy case: */ |
| 842 | rqstp->rq_splice_ok = 1; | 869 | rqstp->rq_splice_ok = 1; |
| 843 | /* tcp needs a space for the record length... */ | 870 | |
| 844 | if (rqstp->rq_prot == IPPROTO_TCP) | 871 | /* Setup reply header */ |
| 845 | svc_putnl(resv, 0); | 872 | rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); |
| 846 | 873 | ||
| 847 | rqstp->rq_xid = svc_getu32(argv); | 874 | rqstp->rq_xid = svc_getu32(argv); |
| 848 | svc_putu32(resv, rqstp->rq_xid); | 875 | svc_putu32(resv, rqstp->rq_xid); |
| @@ -1049,16 +1076,15 @@ err_bad: | |||
| 1049 | svc_putnl(resv, ntohl(rpc_stat)); | 1076 | svc_putnl(resv, ntohl(rpc_stat)); |
| 1050 | goto sendit; | 1077 | goto sendit; |
| 1051 | } | 1078 | } |
| 1079 | EXPORT_SYMBOL(svc_process); | ||
| 1052 | 1080 | ||
| 1053 | /* | 1081 | /* |
| 1054 | * Return (transport-specific) limit on the rpc payload. | 1082 | * Return (transport-specific) limit on the rpc payload. |
| 1055 | */ | 1083 | */ |
| 1056 | u32 svc_max_payload(const struct svc_rqst *rqstp) | 1084 | u32 svc_max_payload(const struct svc_rqst *rqstp) |
| 1057 | { | 1085 | { |
| 1058 | int max = RPCSVC_MAXPAYLOAD_TCP; | 1086 | u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload; |
| 1059 | 1087 | ||
| 1060 | if (rqstp->rq_sock->sk_sock->type == SOCK_DGRAM) | ||
| 1061 | max = RPCSVC_MAXPAYLOAD_UDP; | ||
| 1062 | if (rqstp->rq_server->sv_max_payload < max) | 1088 | if (rqstp->rq_server->sv_max_payload < max) |
| 1063 | max = rqstp->rq_server->sv_max_payload; | 1089 | max = rqstp->rq_server->sv_max_payload; |
| 1064 | return max; | 1090 | return max; |
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c new file mode 100644 index 000000000000..ea377e06afae --- /dev/null +++ b/net/sunrpc/svc_xprt.c | |||
| @@ -0,0 +1,1055 @@ | |||
| 1 | /* | ||
| 2 | * linux/net/sunrpc/svc_xprt.c | ||
| 3 | * | ||
| 4 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
| 5 | */ | ||
| 6 | |||
| 7 | #include <linux/sched.h> | ||
| 8 | #include <linux/errno.h> | ||
| 9 | #include <linux/fcntl.h> | ||
| 10 | #include <linux/net.h> | ||
| 11 | #include <linux/in.h> | ||
| 12 | #include <linux/inet.h> | ||
| 13 | #include <linux/udp.h> | ||
| 14 | #include <linux/tcp.h> | ||
| 15 | #include <linux/unistd.h> | ||
| 16 | #include <linux/slab.h> | ||
| 17 | #include <linux/netdevice.h> | ||
| 18 | #include <linux/skbuff.h> | ||
| 19 | #include <linux/file.h> | ||
| 20 | #include <linux/freezer.h> | ||
| 21 | #include <net/sock.h> | ||
| 22 | #include <net/checksum.h> | ||
| 23 | #include <net/ip.h> | ||
| 24 | #include <net/ipv6.h> | ||
| 25 | #include <net/tcp_states.h> | ||
| 26 | #include <linux/uaccess.h> | ||
| 27 | #include <asm/ioctls.h> | ||
| 28 | |||
| 29 | #include <linux/sunrpc/types.h> | ||
| 30 | #include <linux/sunrpc/clnt.h> | ||
| 31 | #include <linux/sunrpc/xdr.h> | ||
| 32 | #include <linux/sunrpc/stats.h> | ||
| 33 | #include <linux/sunrpc/svc_xprt.h> | ||
| 34 | |||
| 35 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
| 36 | |||
| 37 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); | ||
| 38 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
| 39 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
| 40 | static void svc_age_temp_xprts(unsigned long closure); | ||
| 41 | |||
| 42 | /* apparently the "standard" is that clients close | ||
| 43 | * idle connections after 5 minutes, servers after | ||
| 44 | * 6 minutes | ||
| 45 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
| 46 | */ | ||
| 47 | static int svc_conn_age_period = 6*60; | ||
| 48 | |||
| 49 | /* List of registered transport classes */ | ||
| 50 | static DEFINE_SPINLOCK(svc_xprt_class_lock); | ||
| 51 | static LIST_HEAD(svc_xprt_class_list); | ||
| 52 | |||
| 53 | /* SMP locking strategy: | ||
| 54 | * | ||
| 55 | * svc_pool->sp_lock protects most of the fields of that pool. | ||
| 56 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
| 57 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
| 58 | * BKL protects svc_serv->sv_nrthread. | ||
| 59 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | ||
| 60 | * and the ->sk_info_authunix cache. | ||
| 61 | * | ||
| 62 | * The XPT_BUSY bit in xprt->xpt_flags prevents a transport being | ||
| 63 | * enqueued multiply. During normal transport processing this bit | ||
| 64 | * is set by svc_xprt_enqueue and cleared by svc_xprt_received. | ||
| 65 | * Providers should not manipulate this bit directly. | ||
| 66 | * | ||
| 67 | * Some flags can be set to certain values at any time | ||
| 68 | * providing that certain rules are followed: | ||
| 69 | * | ||
| 70 | * XPT_CONN, XPT_DATA: | ||
| 71 | * - Can be set or cleared at any time. | ||
| 72 | * - After a set, svc_xprt_enqueue must be called to enqueue | ||
| 73 | * the transport for processing. | ||
| 74 | * - After a clear, the transport must be read/accepted. | ||
| 75 | * If this succeeds, it must be set again. | ||
| 76 | * XPT_CLOSE: | ||
| 77 | * - Can set at any time. It is never cleared. | ||
| 78 | * XPT_DEAD: | ||
| 79 | * - Can only be set while XPT_BUSY is held which ensures | ||
| 80 | * that no other thread will be using the transport or will | ||
| 81 | * try to set XPT_DEAD. | ||
| 82 | */ | ||
| 83 | |||
| 84 | int svc_reg_xprt_class(struct svc_xprt_class *xcl) | ||
| 85 | { | ||
| 86 | struct svc_xprt_class *cl; | ||
| 87 | int res = -EEXIST; | ||
| 88 | |||
| 89 | dprintk("svc: Adding svc transport class '%s'\n", xcl->xcl_name); | ||
| 90 | |||
| 91 | INIT_LIST_HEAD(&xcl->xcl_list); | ||
| 92 | spin_lock(&svc_xprt_class_lock); | ||
| 93 | /* Make sure there isn't already a class with the same name */ | ||
| 94 | list_for_each_entry(cl, &svc_xprt_class_list, xcl_list) { | ||
| 95 | if (strcmp(xcl->xcl_name, cl->xcl_name) == 0) | ||
| 96 | goto out; | ||
| 97 | } | ||
| 98 | list_add_tail(&xcl->xcl_list, &svc_xprt_class_list); | ||
| 99 | res = 0; | ||
| 100 | out: | ||
| 101 | spin_unlock(&svc_xprt_class_lock); | ||
| 102 | return res; | ||
| 103 | } | ||
| 104 | EXPORT_SYMBOL_GPL(svc_reg_xprt_class); | ||
| 105 | |||
| 106 | void svc_unreg_xprt_class(struct svc_xprt_class *xcl) | ||
| 107 | { | ||
| 108 | dprintk("svc: Removing svc transport class '%s'\n", xcl->xcl_name); | ||
| 109 | spin_lock(&svc_xprt_class_lock); | ||
| 110 | list_del_init(&xcl->xcl_list); | ||
| 111 | spin_unlock(&svc_xprt_class_lock); | ||
| 112 | } | ||
| 113 | EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); | ||
| 114 | |||
| 115 | /* | ||
| 116 | * Format the transport list for printing | ||
| 117 | */ | ||
| 118 | int svc_print_xprts(char *buf, int maxlen) | ||
| 119 | { | ||
| 120 | struct list_head *le; | ||
| 121 | char tmpstr[80]; | ||
| 122 | int len = 0; | ||
| 123 | buf[0] = '\0'; | ||
| 124 | |||
| 125 | spin_lock(&svc_xprt_class_lock); | ||
| 126 | list_for_each(le, &svc_xprt_class_list) { | ||
| 127 | int slen; | ||
| 128 | struct svc_xprt_class *xcl = | ||
| 129 | list_entry(le, struct svc_xprt_class, xcl_list); | ||
| 130 | |||
| 131 | sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); | ||
| 132 | slen = strlen(tmpstr); | ||
| 133 | if (len + slen > maxlen) | ||
| 134 | break; | ||
| 135 | len += slen; | ||
| 136 | strcat(buf, tmpstr); | ||
| 137 | } | ||
| 138 | spin_unlock(&svc_xprt_class_lock); | ||
| 139 | |||
| 140 | return len; | ||
| 141 | } | ||
| 142 | |||
| 143 | static void svc_xprt_free(struct kref *kref) | ||
| 144 | { | ||
| 145 | struct svc_xprt *xprt = | ||
| 146 | container_of(kref, struct svc_xprt, xpt_ref); | ||
| 147 | struct module *owner = xprt->xpt_class->xcl_owner; | ||
| 148 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) | ||
| 149 | && xprt->xpt_auth_cache != NULL) | ||
| 150 | svcauth_unix_info_release(xprt->xpt_auth_cache); | ||
| 151 | xprt->xpt_ops->xpo_free(xprt); | ||
| 152 | module_put(owner); | ||
| 153 | } | ||
| 154 | |||
| 155 | void svc_xprt_put(struct svc_xprt *xprt) | ||
| 156 | { | ||
| 157 | kref_put(&xprt->xpt_ref, svc_xprt_free); | ||
| 158 | } | ||
| 159 | EXPORT_SYMBOL_GPL(svc_xprt_put); | ||
| 160 | |||
| 161 | /* | ||
| 162 | * Called by transport drivers to initialize the transport independent | ||
| 163 | * portion of the transport instance. | ||
| 164 | */ | ||
| 165 | void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, | ||
| 166 | struct svc_serv *serv) | ||
| 167 | { | ||
| 168 | memset(xprt, 0, sizeof(*xprt)); | ||
| 169 | xprt->xpt_class = xcl; | ||
| 170 | xprt->xpt_ops = xcl->xcl_ops; | ||
| 171 | kref_init(&xprt->xpt_ref); | ||
| 172 | xprt->xpt_server = serv; | ||
| 173 | INIT_LIST_HEAD(&xprt->xpt_list); | ||
| 174 | INIT_LIST_HEAD(&xprt->xpt_ready); | ||
| 175 | INIT_LIST_HEAD(&xprt->xpt_deferred); | ||
| 176 | mutex_init(&xprt->xpt_mutex); | ||
| 177 | spin_lock_init(&xprt->xpt_lock); | ||
| 178 | set_bit(XPT_BUSY, &xprt->xpt_flags); | ||
| 179 | } | ||
| 180 | EXPORT_SYMBOL_GPL(svc_xprt_init); | ||
| 181 | |||
| 182 | int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, | ||
| 183 | int flags) | ||
| 184 | { | ||
| 185 | struct svc_xprt_class *xcl; | ||
| 186 | struct sockaddr_in sin = { | ||
| 187 | .sin_family = AF_INET, | ||
| 188 | .sin_addr.s_addr = INADDR_ANY, | ||
| 189 | .sin_port = htons(port), | ||
| 190 | }; | ||
| 191 | dprintk("svc: creating transport %s[%d]\n", xprt_name, port); | ||
| 192 | spin_lock(&svc_xprt_class_lock); | ||
| 193 | list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { | ||
| 194 | struct svc_xprt *newxprt; | ||
| 195 | |||
| 196 | if (strcmp(xprt_name, xcl->xcl_name)) | ||
| 197 | continue; | ||
| 198 | |||
| 199 | if (!try_module_get(xcl->xcl_owner)) | ||
| 200 | goto err; | ||
| 201 | |||
| 202 | spin_unlock(&svc_xprt_class_lock); | ||
| 203 | newxprt = xcl->xcl_ops-> | ||
| 204 | xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), | ||
| 205 | flags); | ||
| 206 | if (IS_ERR(newxprt)) { | ||
| 207 | module_put(xcl->xcl_owner); | ||
| 208 | return PTR_ERR(newxprt); | ||
| 209 | } | ||
| 210 | |||
| 211 | clear_bit(XPT_TEMP, &newxprt->xpt_flags); | ||
| 212 | spin_lock_bh(&serv->sv_lock); | ||
| 213 | list_add(&newxprt->xpt_list, &serv->sv_permsocks); | ||
| 214 | spin_unlock_bh(&serv->sv_lock); | ||
| 215 | clear_bit(XPT_BUSY, &newxprt->xpt_flags); | ||
| 216 | return svc_xprt_local_port(newxprt); | ||
| 217 | } | ||
| 218 | err: | ||
| 219 | spin_unlock(&svc_xprt_class_lock); | ||
| 220 | dprintk("svc: transport %s not found\n", xprt_name); | ||
| 221 | return -ENOENT; | ||
| 222 | } | ||
| 223 | EXPORT_SYMBOL_GPL(svc_create_xprt); | ||
| 224 | |||
| 225 | /* | ||
| 226 | * Copy the local and remote xprt addresses to the rqstp structure | ||
| 227 | */ | ||
| 228 | void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) | ||
| 229 | { | ||
| 230 | struct sockaddr *sin; | ||
| 231 | |||
| 232 | memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); | ||
| 233 | rqstp->rq_addrlen = xprt->xpt_remotelen; | ||
| 234 | |||
| 235 | /* | ||
| 236 | * Destination address in request is needed for binding the | ||
| 237 | * source address in RPC replies/callbacks later. | ||
| 238 | */ | ||
| 239 | sin = (struct sockaddr *)&xprt->xpt_local; | ||
| 240 | switch (sin->sa_family) { | ||
| 241 | case AF_INET: | ||
| 242 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | ||
| 243 | break; | ||
| 244 | case AF_INET6: | ||
| 245 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | ||
| 246 | break; | ||
| 247 | } | ||
| 248 | } | ||
| 249 | EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); | ||
| 250 | |||
| 251 | /** | ||
| 252 | * svc_print_addr - Format rq_addr field for printing | ||
| 253 | * @rqstp: svc_rqst struct containing address to print | ||
| 254 | * @buf: target buffer for formatted address | ||
| 255 | * @len: length of target buffer | ||
| 256 | * | ||
| 257 | */ | ||
| 258 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | ||
| 259 | { | ||
| 260 | return __svc_print_addr(svc_addr(rqstp), buf, len); | ||
| 261 | } | ||
| 262 | EXPORT_SYMBOL_GPL(svc_print_addr); | ||
| 263 | |||
| 264 | /* | ||
| 265 | * Queue up an idle server thread. Must have pool->sp_lock held. | ||
| 266 | * Note: this is really a stack rather than a queue, so that we only | ||
| 267 | * use as many different threads as we need, and the rest don't pollute | ||
| 268 | * the cache. | ||
| 269 | */ | ||
| 270 | static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
| 271 | { | ||
| 272 | list_add(&rqstp->rq_list, &pool->sp_threads); | ||
| 273 | } | ||
| 274 | |||
| 275 | /* | ||
| 276 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | ||
| 277 | */ | ||
| 278 | static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
| 279 | { | ||
| 280 | list_del(&rqstp->rq_list); | ||
| 281 | } | ||
| 282 | |||
| 283 | /* | ||
| 284 | * Queue up a transport with data pending. If there are idle nfsd | ||
| 285 | * processes, wake 'em up. | ||
| 286 | * | ||
| 287 | */ | ||
| 288 | void svc_xprt_enqueue(struct svc_xprt *xprt) | ||
| 289 | { | ||
| 290 | struct svc_serv *serv = xprt->xpt_server; | ||
| 291 | struct svc_pool *pool; | ||
| 292 | struct svc_rqst *rqstp; | ||
| 293 | int cpu; | ||
| 294 | |||
| 295 | if (!(xprt->xpt_flags & | ||
| 296 | ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) | ||
| 297 | return; | ||
| 298 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | ||
| 299 | return; | ||
| 300 | |||
| 301 | cpu = get_cpu(); | ||
| 302 | pool = svc_pool_for_cpu(xprt->xpt_server, cpu); | ||
| 303 | put_cpu(); | ||
| 304 | |||
| 305 | spin_lock_bh(&pool->sp_lock); | ||
| 306 | |||
| 307 | if (!list_empty(&pool->sp_threads) && | ||
| 308 | !list_empty(&pool->sp_sockets)) | ||
| 309 | printk(KERN_ERR | ||
| 310 | "svc_xprt_enqueue: " | ||
| 311 | "threads and transports both waiting??\n"); | ||
| 312 | |||
| 313 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { | ||
| 314 | /* Don't enqueue dead transports */ | ||
| 315 | dprintk("svc: transport %p is dead, not enqueued\n", xprt); | ||
| 316 | goto out_unlock; | ||
| 317 | } | ||
| 318 | |||
| 319 | /* Mark transport as busy. It will remain in this state until | ||
| 320 | * the provider calls svc_xprt_received. We update XPT_BUSY | ||
| 321 | * atomically because it also guards against trying to enqueue | ||
| 322 | * the transport twice. | ||
| 323 | */ | ||
| 324 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { | ||
| 325 | /* Don't enqueue transport while already enqueued */ | ||
| 326 | dprintk("svc: transport %p busy, not enqueued\n", xprt); | ||
| 327 | goto out_unlock; | ||
| 328 | } | ||
| 329 | BUG_ON(xprt->xpt_pool != NULL); | ||
| 330 | xprt->xpt_pool = pool; | ||
| 331 | |||
| 332 | /* Handle pending connection */ | ||
| 333 | if (test_bit(XPT_CONN, &xprt->xpt_flags)) | ||
| 334 | goto process; | ||
| 335 | |||
| 336 | /* Handle close in-progress */ | ||
| 337 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | ||
| 338 | goto process; | ||
| 339 | |||
| 340 | /* Check if we have space to reply to a request */ | ||
| 341 | if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { | ||
| 342 | /* Don't enqueue while not enough space for reply */ | ||
| 343 | dprintk("svc: no write space, transport %p not enqueued\n", | ||
| 344 | xprt); | ||
| 345 | xprt->xpt_pool = NULL; | ||
| 346 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
| 347 | goto out_unlock; | ||
| 348 | } | ||
| 349 | |||
| 350 | process: | ||
| 351 | if (!list_empty(&pool->sp_threads)) { | ||
| 352 | rqstp = list_entry(pool->sp_threads.next, | ||
| 353 | struct svc_rqst, | ||
| 354 | rq_list); | ||
| 355 | dprintk("svc: transport %p served by daemon %p\n", | ||
| 356 | xprt, rqstp); | ||
| 357 | svc_thread_dequeue(pool, rqstp); | ||
| 358 | if (rqstp->rq_xprt) | ||
| 359 | printk(KERN_ERR | ||
| 360 | "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", | ||
| 361 | rqstp, rqstp->rq_xprt); | ||
| 362 | rqstp->rq_xprt = xprt; | ||
| 363 | svc_xprt_get(xprt); | ||
| 364 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
| 365 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
| 366 | BUG_ON(xprt->xpt_pool != pool); | ||
| 367 | wake_up(&rqstp->rq_wait); | ||
| 368 | } else { | ||
| 369 | dprintk("svc: transport %p put into queue\n", xprt); | ||
| 370 | list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); | ||
| 371 | BUG_ON(xprt->xpt_pool != pool); | ||
| 372 | } | ||
| 373 | |||
| 374 | out_unlock: | ||
| 375 | spin_unlock_bh(&pool->sp_lock); | ||
| 376 | } | ||
| 377 | EXPORT_SYMBOL_GPL(svc_xprt_enqueue); | ||
| 378 | |||
| 379 | /* | ||
| 380 | * Dequeue the first transport. Must be called with the pool->sp_lock held. | ||
| 381 | */ | ||
| 382 | static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) | ||
| 383 | { | ||
| 384 | struct svc_xprt *xprt; | ||
| 385 | |||
| 386 | if (list_empty(&pool->sp_sockets)) | ||
| 387 | return NULL; | ||
| 388 | |||
| 389 | xprt = list_entry(pool->sp_sockets.next, | ||
| 390 | struct svc_xprt, xpt_ready); | ||
| 391 | list_del_init(&xprt->xpt_ready); | ||
| 392 | |||
| 393 | dprintk("svc: transport %p dequeued, inuse=%d\n", | ||
| 394 | xprt, atomic_read(&xprt->xpt_ref.refcount)); | ||
| 395 | |||
| 396 | return xprt; | ||
| 397 | } | ||
| 398 | |||
| 399 | /* | ||
| 400 | * svc_xprt_received conditionally queues the transport for processing | ||
| 401 | * by another thread. The caller must hold the XPT_BUSY bit and must | ||
| 402 | * not thereafter touch transport data. | ||
| 403 | * | ||
| 404 | * Note: XPT_DATA only gets cleared when a read-attempt finds no (or | ||
| 405 | * insufficient) data. | ||
| 406 | */ | ||
| 407 | void svc_xprt_received(struct svc_xprt *xprt) | ||
| 408 | { | ||
| 409 | BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); | ||
| 410 | xprt->xpt_pool = NULL; | ||
| 411 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
| 412 | svc_xprt_enqueue(xprt); | ||
| 413 | } | ||
| 414 | EXPORT_SYMBOL_GPL(svc_xprt_received); | ||
| 415 | |||
| 416 | /** | ||
| 417 | * svc_reserve - change the space reserved for the reply to a request. | ||
| 418 | * @rqstp: The request in question | ||
| 419 | * @space: new max space to reserve | ||
| 420 | * | ||
| 421 | * Each request reserves some space on the output queue of the transport | ||
| 422 | * to make sure the reply fits. This function reduces that reserved | ||
| 423 | * space to be the amount of space used already, plus @space. | ||
| 424 | * | ||
| 425 | */ | ||
| 426 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
| 427 | { | ||
| 428 | space += rqstp->rq_res.head[0].iov_len; | ||
| 429 | |||
| 430 | if (space < rqstp->rq_reserved) { | ||
| 431 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
| 432 | atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); | ||
| 433 | rqstp->rq_reserved = space; | ||
| 434 | |||
| 435 | svc_xprt_enqueue(xprt); | ||
| 436 | } | ||
| 437 | } | ||
| 438 | EXPORT_SYMBOL(svc_reserve); | ||
| 439 | |||
| 440 | static void svc_xprt_release(struct svc_rqst *rqstp) | ||
| 441 | { | ||
| 442 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
| 443 | |||
| 444 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | ||
| 445 | |||
| 446 | svc_free_res_pages(rqstp); | ||
| 447 | rqstp->rq_res.page_len = 0; | ||
| 448 | rqstp->rq_res.page_base = 0; | ||
| 449 | |||
| 450 | /* Reset response buffer and release | ||
| 451 | * the reservation. | ||
| 452 | * But first, check that enough space was reserved | ||
| 453 | * for the reply, otherwise we have a bug! | ||
| 454 | */ | ||
| 455 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
| 456 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
| 457 | rqstp->rq_reserved, | ||
| 458 | rqstp->rq_res.len); | ||
| 459 | |||
| 460 | rqstp->rq_res.head[0].iov_len = 0; | ||
| 461 | svc_reserve(rqstp, 0); | ||
| 462 | rqstp->rq_xprt = NULL; | ||
| 463 | |||
| 464 | svc_xprt_put(xprt); | ||
| 465 | } | ||
| 466 | |||
| 467 | /* | ||
| 468 | * External function to wake up a server waiting for data | ||
| 469 | * This really only makes sense for services like lockd | ||
| 470 | * which have exactly one thread anyway. | ||
| 471 | */ | ||
| 472 | void svc_wake_up(struct svc_serv *serv) | ||
| 473 | { | ||
| 474 | struct svc_rqst *rqstp; | ||
| 475 | unsigned int i; | ||
| 476 | struct svc_pool *pool; | ||
| 477 | |||
| 478 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
| 479 | pool = &serv->sv_pools[i]; | ||
| 480 | |||
| 481 | spin_lock_bh(&pool->sp_lock); | ||
| 482 | if (!list_empty(&pool->sp_threads)) { | ||
| 483 | rqstp = list_entry(pool->sp_threads.next, | ||
| 484 | struct svc_rqst, | ||
| 485 | rq_list); | ||
| 486 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
| 487 | /* | ||
| 488 | svc_thread_dequeue(pool, rqstp); | ||
| 489 | rqstp->rq_xprt = NULL; | ||
| 490 | */ | ||
| 491 | wake_up(&rqstp->rq_wait); | ||
| 492 | } | ||
| 493 | spin_unlock_bh(&pool->sp_lock); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | EXPORT_SYMBOL(svc_wake_up); | ||
| 497 | |||
| 498 | int svc_port_is_privileged(struct sockaddr *sin) | ||
| 499 | { | ||
| 500 | switch (sin->sa_family) { | ||
| 501 | case AF_INET: | ||
| 502 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | ||
| 503 | < PROT_SOCK; | ||
| 504 | case AF_INET6: | ||
| 505 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | ||
| 506 | < PROT_SOCK; | ||
| 507 | default: | ||
| 508 | return 0; | ||
| 509 | } | ||
| 510 | } | ||
| 511 | |||
| 512 | /* | ||
| 513 | * Make sure that we don't have too many active connections. If we | ||
| 514 | * have, something must be dropped. | ||
| 515 | * | ||
| 516 | * There's no point in trying to do random drop here for DoS | ||
| 517 | * prevention. The NFS clients does 1 reconnect in 15 seconds. An | ||
| 518 | * attacker can easily beat that. | ||
| 519 | * | ||
| 520 | * The only somewhat efficient mechanism would be if drop old | ||
| 521 | * connections from the same IP first. But right now we don't even | ||
| 522 | * record the client IP in svc_sock. | ||
| 523 | */ | ||
| 524 | static void svc_check_conn_limits(struct svc_serv *serv) | ||
| 525 | { | ||
| 526 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
| 527 | struct svc_xprt *xprt = NULL; | ||
| 528 | spin_lock_bh(&serv->sv_lock); | ||
| 529 | if (!list_empty(&serv->sv_tempsocks)) { | ||
| 530 | if (net_ratelimit()) { | ||
| 531 | /* Try to help the admin */ | ||
| 532 | printk(KERN_NOTICE "%s: too many open " | ||
| 533 | "connections, consider increasing the " | ||
| 534 | "number of nfsd threads\n", | ||
| 535 | serv->sv_name); | ||
| 536 | } | ||
| 537 | /* | ||
| 538 | * Always select the oldest connection. It's not fair, | ||
| 539 | * but so is life | ||
| 540 | */ | ||
| 541 | xprt = list_entry(serv->sv_tempsocks.prev, | ||
| 542 | struct svc_xprt, | ||
| 543 | xpt_list); | ||
| 544 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 545 | svc_xprt_get(xprt); | ||
| 546 | } | ||
| 547 | spin_unlock_bh(&serv->sv_lock); | ||
| 548 | |||
| 549 | if (xprt) { | ||
| 550 | svc_xprt_enqueue(xprt); | ||
| 551 | svc_xprt_put(xprt); | ||
| 552 | } | ||
| 553 | } | ||
| 554 | } | ||
| 555 | |||
| 556 | /* | ||
| 557 | * Receive the next request on any transport. This code is carefully | ||
| 558 | * organised not to touch any cachelines in the shared svc_serv | ||
| 559 | * structure, only cachelines in the local svc_pool. | ||
| 560 | */ | ||
| 561 | int svc_recv(struct svc_rqst *rqstp, long timeout) | ||
| 562 | { | ||
| 563 | struct svc_xprt *xprt = NULL; | ||
| 564 | struct svc_serv *serv = rqstp->rq_server; | ||
| 565 | struct svc_pool *pool = rqstp->rq_pool; | ||
| 566 | int len, i; | ||
| 567 | int pages; | ||
| 568 | struct xdr_buf *arg; | ||
| 569 | DECLARE_WAITQUEUE(wait, current); | ||
| 570 | |||
| 571 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
| 572 | rqstp, timeout); | ||
| 573 | |||
| 574 | if (rqstp->rq_xprt) | ||
| 575 | printk(KERN_ERR | ||
| 576 | "svc_recv: service %p, transport not NULL!\n", | ||
| 577 | rqstp); | ||
| 578 | if (waitqueue_active(&rqstp->rq_wait)) | ||
| 579 | printk(KERN_ERR | ||
| 580 | "svc_recv: service %p, wait queue active!\n", | ||
| 581 | rqstp); | ||
| 582 | |||
| 583 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
| 584 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | ||
| 585 | for (i = 0; i < pages ; i++) | ||
| 586 | while (rqstp->rq_pages[i] == NULL) { | ||
| 587 | struct page *p = alloc_page(GFP_KERNEL); | ||
| 588 | if (!p) { | ||
| 589 | int j = msecs_to_jiffies(500); | ||
| 590 | schedule_timeout_uninterruptible(j); | ||
| 591 | } | ||
| 592 | rqstp->rq_pages[i] = p; | ||
| 593 | } | ||
| 594 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | ||
| 595 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
| 596 | |||
| 597 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
| 598 | arg = &rqstp->rq_arg; | ||
| 599 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | ||
| 600 | arg->head[0].iov_len = PAGE_SIZE; | ||
| 601 | arg->pages = rqstp->rq_pages + 1; | ||
| 602 | arg->page_base = 0; | ||
| 603 | /* save at least one page for response */ | ||
| 604 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
| 605 | arg->len = (pages-1)*PAGE_SIZE; | ||
| 606 | arg->tail[0].iov_len = 0; | ||
| 607 | |||
| 608 | try_to_freeze(); | ||
| 609 | cond_resched(); | ||
| 610 | if (signalled()) | ||
| 611 | return -EINTR; | ||
| 612 | |||
| 613 | spin_lock_bh(&pool->sp_lock); | ||
| 614 | xprt = svc_xprt_dequeue(pool); | ||
| 615 | if (xprt) { | ||
| 616 | rqstp->rq_xprt = xprt; | ||
| 617 | svc_xprt_get(xprt); | ||
| 618 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
| 619 | atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); | ||
| 620 | } else { | ||
| 621 | /* No data pending. Go to sleep */ | ||
| 622 | svc_thread_enqueue(pool, rqstp); | ||
| 623 | |||
| 624 | /* | ||
| 625 | * We have to be able to interrupt this wait | ||
| 626 | * to bring down the daemons ... | ||
| 627 | */ | ||
| 628 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 629 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
| 630 | spin_unlock_bh(&pool->sp_lock); | ||
| 631 | |||
| 632 | schedule_timeout(timeout); | ||
| 633 | |||
| 634 | try_to_freeze(); | ||
| 635 | |||
| 636 | spin_lock_bh(&pool->sp_lock); | ||
| 637 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
| 638 | |||
| 639 | xprt = rqstp->rq_xprt; | ||
| 640 | if (!xprt) { | ||
| 641 | svc_thread_dequeue(pool, rqstp); | ||
| 642 | spin_unlock_bh(&pool->sp_lock); | ||
| 643 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
| 644 | return signalled()? -EINTR : -EAGAIN; | ||
| 645 | } | ||
| 646 | } | ||
| 647 | spin_unlock_bh(&pool->sp_lock); | ||
| 648 | |||
| 649 | len = 0; | ||
| 650 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { | ||
| 651 | dprintk("svc_recv: found XPT_CLOSE\n"); | ||
| 652 | svc_delete_xprt(xprt); | ||
| 653 | } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { | ||
| 654 | struct svc_xprt *newxpt; | ||
| 655 | newxpt = xprt->xpt_ops->xpo_accept(xprt); | ||
| 656 | if (newxpt) { | ||
| 657 | /* | ||
| 658 | * We know this module_get will succeed because the | ||
| 659 | * listener holds a reference too | ||
| 660 | */ | ||
| 661 | __module_get(newxpt->xpt_class->xcl_owner); | ||
| 662 | svc_check_conn_limits(xprt->xpt_server); | ||
| 663 | spin_lock_bh(&serv->sv_lock); | ||
| 664 | set_bit(XPT_TEMP, &newxpt->xpt_flags); | ||
| 665 | list_add(&newxpt->xpt_list, &serv->sv_tempsocks); | ||
| 666 | serv->sv_tmpcnt++; | ||
| 667 | if (serv->sv_temptimer.function == NULL) { | ||
| 668 | /* setup timer to age temp transports */ | ||
| 669 | setup_timer(&serv->sv_temptimer, | ||
| 670 | svc_age_temp_xprts, | ||
| 671 | (unsigned long)serv); | ||
| 672 | mod_timer(&serv->sv_temptimer, | ||
| 673 | jiffies + svc_conn_age_period * HZ); | ||
| 674 | } | ||
| 675 | spin_unlock_bh(&serv->sv_lock); | ||
| 676 | svc_xprt_received(newxpt); | ||
| 677 | } | ||
| 678 | svc_xprt_received(xprt); | ||
| 679 | } else { | ||
| 680 | dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", | ||
| 681 | rqstp, pool->sp_id, xprt, | ||
| 682 | atomic_read(&xprt->xpt_ref.refcount)); | ||
| 683 | rqstp->rq_deferred = svc_deferred_dequeue(xprt); | ||
| 684 | if (rqstp->rq_deferred) { | ||
| 685 | svc_xprt_received(xprt); | ||
| 686 | len = svc_deferred_recv(rqstp); | ||
| 687 | } else | ||
| 688 | len = xprt->xpt_ops->xpo_recvfrom(rqstp); | ||
| 689 | dprintk("svc: got len=%d\n", len); | ||
| 690 | } | ||
| 691 | |||
| 692 | /* No data, incomplete (TCP) read, or accept() */ | ||
| 693 | if (len == 0 || len == -EAGAIN) { | ||
| 694 | rqstp->rq_res.len = 0; | ||
| 695 | svc_xprt_release(rqstp); | ||
| 696 | return -EAGAIN; | ||
| 697 | } | ||
| 698 | clear_bit(XPT_OLD, &xprt->xpt_flags); | ||
| 699 | |||
| 700 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | ||
| 701 | rqstp->rq_chandle.defer = svc_defer; | ||
| 702 | |||
| 703 | if (serv->sv_stats) | ||
| 704 | serv->sv_stats->netcnt++; | ||
| 705 | return len; | ||
| 706 | } | ||
| 707 | EXPORT_SYMBOL(svc_recv); | ||
| 708 | |||
| 709 | /* | ||
| 710 | * Drop request | ||
| 711 | */ | ||
| 712 | void svc_drop(struct svc_rqst *rqstp) | ||
| 713 | { | ||
| 714 | dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); | ||
| 715 | svc_xprt_release(rqstp); | ||
| 716 | } | ||
| 717 | EXPORT_SYMBOL(svc_drop); | ||
| 718 | |||
| 719 | /* | ||
| 720 | * Return reply to client. | ||
| 721 | */ | ||
| 722 | int svc_send(struct svc_rqst *rqstp) | ||
| 723 | { | ||
| 724 | struct svc_xprt *xprt; | ||
| 725 | int len; | ||
| 726 | struct xdr_buf *xb; | ||
| 727 | |||
| 728 | xprt = rqstp->rq_xprt; | ||
| 729 | if (!xprt) | ||
| 730 | return -EFAULT; | ||
| 731 | |||
| 732 | /* release the receive skb before sending the reply */ | ||
| 733 | rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); | ||
| 734 | |||
| 735 | /* calculate over-all length */ | ||
| 736 | xb = &rqstp->rq_res; | ||
| 737 | xb->len = xb->head[0].iov_len + | ||
| 738 | xb->page_len + | ||
| 739 | xb->tail[0].iov_len; | ||
| 740 | |||
| 741 | /* Grab mutex to serialize outgoing data. */ | ||
| 742 | mutex_lock(&xprt->xpt_mutex); | ||
| 743 | if (test_bit(XPT_DEAD, &xprt->xpt_flags)) | ||
| 744 | len = -ENOTCONN; | ||
| 745 | else | ||
| 746 | len = xprt->xpt_ops->xpo_sendto(rqstp); | ||
| 747 | mutex_unlock(&xprt->xpt_mutex); | ||
| 748 | svc_xprt_release(rqstp); | ||
| 749 | |||
| 750 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
| 751 | return 0; | ||
| 752 | return len; | ||
| 753 | } | ||
| 754 | |||
| 755 | /* | ||
| 756 | * Timer function to close old temporary transports, using | ||
| 757 | * a mark-and-sweep algorithm. | ||
| 758 | */ | ||
| 759 | static void svc_age_temp_xprts(unsigned long closure) | ||
| 760 | { | ||
| 761 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
| 762 | struct svc_xprt *xprt; | ||
| 763 | struct list_head *le, *next; | ||
| 764 | LIST_HEAD(to_be_aged); | ||
| 765 | |||
| 766 | dprintk("svc_age_temp_xprts\n"); | ||
| 767 | |||
| 768 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
| 769 | /* busy, try again 1 sec later */ | ||
| 770 | dprintk("svc_age_temp_xprts: busy\n"); | ||
| 771 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
| 772 | return; | ||
| 773 | } | ||
| 774 | |||
| 775 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
| 776 | xprt = list_entry(le, struct svc_xprt, xpt_list); | ||
| 777 | |||
| 778 | /* First time through, just mark it OLD. Second time | ||
| 779 | * through, close it. */ | ||
| 780 | if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) | ||
| 781 | continue; | ||
| 782 | if (atomic_read(&xprt->xpt_ref.refcount) > 1 | ||
| 783 | || test_bit(XPT_BUSY, &xprt->xpt_flags)) | ||
| 784 | continue; | ||
| 785 | svc_xprt_get(xprt); | ||
| 786 | list_move(le, &to_be_aged); | ||
| 787 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 788 | set_bit(XPT_DETACHED, &xprt->xpt_flags); | ||
| 789 | } | ||
| 790 | spin_unlock_bh(&serv->sv_lock); | ||
| 791 | |||
| 792 | while (!list_empty(&to_be_aged)) { | ||
| 793 | le = to_be_aged.next; | ||
| 794 | /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ | ||
| 795 | list_del_init(le); | ||
| 796 | xprt = list_entry(le, struct svc_xprt, xpt_list); | ||
| 797 | |||
| 798 | dprintk("queuing xprt %p for closing\n", xprt); | ||
| 799 | |||
| 800 | /* a thread will dequeue and close it soon */ | ||
| 801 | svc_xprt_enqueue(xprt); | ||
| 802 | svc_xprt_put(xprt); | ||
| 803 | } | ||
| 804 | |||
| 805 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
| 806 | } | ||
| 807 | |||
| 808 | /* | ||
| 809 | * Remove a dead transport | ||
| 810 | */ | ||
| 811 | void svc_delete_xprt(struct svc_xprt *xprt) | ||
| 812 | { | ||
| 813 | struct svc_serv *serv = xprt->xpt_server; | ||
| 814 | |||
| 815 | dprintk("svc: svc_delete_xprt(%p)\n", xprt); | ||
| 816 | xprt->xpt_ops->xpo_detach(xprt); | ||
| 817 | |||
| 818 | spin_lock_bh(&serv->sv_lock); | ||
| 819 | if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) | ||
| 820 | list_del_init(&xprt->xpt_list); | ||
| 821 | /* | ||
| 822 | * We used to delete the transport from whichever list | ||
| 823 | * it's sk_xprt.xpt_ready node was on, but we don't actually | ||
| 824 | * need to. This is because the only time we're called | ||
| 825 | * while still attached to a queue, the queue itself | ||
| 826 | * is about to be destroyed (in svc_destroy). | ||
| 827 | */ | ||
| 828 | if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) { | ||
| 829 | BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2); | ||
| 830 | if (test_bit(XPT_TEMP, &xprt->xpt_flags)) | ||
| 831 | serv->sv_tmpcnt--; | ||
| 832 | svc_xprt_put(xprt); | ||
| 833 | } | ||
| 834 | spin_unlock_bh(&serv->sv_lock); | ||
| 835 | } | ||
| 836 | |||
| 837 | void svc_close_xprt(struct svc_xprt *xprt) | ||
| 838 | { | ||
| 839 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 840 | if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) | ||
| 841 | /* someone else will have to effect the close */ | ||
| 842 | return; | ||
| 843 | |||
| 844 | svc_xprt_get(xprt); | ||
| 845 | svc_delete_xprt(xprt); | ||
| 846 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
| 847 | svc_xprt_put(xprt); | ||
| 848 | } | ||
| 849 | EXPORT_SYMBOL_GPL(svc_close_xprt); | ||
| 850 | |||
| 851 | void svc_close_all(struct list_head *xprt_list) | ||
| 852 | { | ||
| 853 | struct svc_xprt *xprt; | ||
| 854 | struct svc_xprt *tmp; | ||
| 855 | |||
| 856 | list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { | ||
| 857 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 858 | if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { | ||
| 859 | /* Waiting to be processed, but no threads left, | ||
| 860 | * So just remove it from the waiting list | ||
| 861 | */ | ||
| 862 | list_del_init(&xprt->xpt_ready); | ||
| 863 | clear_bit(XPT_BUSY, &xprt->xpt_flags); | ||
| 864 | } | ||
| 865 | svc_close_xprt(xprt); | ||
| 866 | } | ||
| 867 | } | ||
| 868 | |||
| 869 | /* | ||
| 870 | * Handle defer and revisit of requests | ||
| 871 | */ | ||
| 872 | |||
| 873 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
| 874 | { | ||
| 875 | struct svc_deferred_req *dr = | ||
| 876 | container_of(dreq, struct svc_deferred_req, handle); | ||
| 877 | struct svc_xprt *xprt = dr->xprt; | ||
| 878 | |||
| 879 | if (too_many) { | ||
| 880 | svc_xprt_put(xprt); | ||
| 881 | kfree(dr); | ||
| 882 | return; | ||
| 883 | } | ||
| 884 | dprintk("revisit queued\n"); | ||
| 885 | dr->xprt = NULL; | ||
| 886 | spin_lock(&xprt->xpt_lock); | ||
| 887 | list_add(&dr->handle.recent, &xprt->xpt_deferred); | ||
| 888 | spin_unlock(&xprt->xpt_lock); | ||
| 889 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
| 890 | svc_xprt_enqueue(xprt); | ||
| 891 | svc_xprt_put(xprt); | ||
| 892 | } | ||
| 893 | |||
| 894 | /* | ||
| 895 | * Save the request off for later processing. The request buffer looks | ||
| 896 | * like this: | ||
| 897 | * | ||
| 898 | * <xprt-header><rpc-header><rpc-pagelist><rpc-tail> | ||
| 899 | * | ||
| 900 | * This code can only handle requests that consist of an xprt-header | ||
| 901 | * and rpc-header. | ||
| 902 | */ | ||
| 903 | static struct cache_deferred_req *svc_defer(struct cache_req *req) | ||
| 904 | { | ||
| 905 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
| 906 | struct svc_deferred_req *dr; | ||
| 907 | |||
| 908 | if (rqstp->rq_arg.page_len) | ||
| 909 | return NULL; /* if more than a page, give up FIXME */ | ||
| 910 | if (rqstp->rq_deferred) { | ||
| 911 | dr = rqstp->rq_deferred; | ||
| 912 | rqstp->rq_deferred = NULL; | ||
| 913 | } else { | ||
| 914 | size_t skip; | ||
| 915 | size_t size; | ||
| 916 | /* FIXME maybe discard if size too large */ | ||
| 917 | size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len; | ||
| 918 | dr = kmalloc(size, GFP_KERNEL); | ||
| 919 | if (dr == NULL) | ||
| 920 | return NULL; | ||
| 921 | |||
| 922 | dr->handle.owner = rqstp->rq_server; | ||
| 923 | dr->prot = rqstp->rq_prot; | ||
| 924 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | ||
| 925 | dr->addrlen = rqstp->rq_addrlen; | ||
| 926 | dr->daddr = rqstp->rq_daddr; | ||
| 927 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
| 928 | dr->xprt_hlen = rqstp->rq_xprt_hlen; | ||
| 929 | |||
| 930 | /* back up head to the start of the buffer and copy */ | ||
| 931 | skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
| 932 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, | ||
| 933 | dr->argslen << 2); | ||
| 934 | } | ||
| 935 | svc_xprt_get(rqstp->rq_xprt); | ||
| 936 | dr->xprt = rqstp->rq_xprt; | ||
| 937 | |||
| 938 | dr->handle.revisit = svc_revisit; | ||
| 939 | return &dr->handle; | ||
| 940 | } | ||
| 941 | |||
| 942 | /* | ||
| 943 | * recv data from a deferred request into an active one | ||
| 944 | */ | ||
| 945 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
| 946 | { | ||
| 947 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
| 948 | |||
| 949 | /* setup iov_base past transport header */ | ||
| 950 | rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); | ||
| 951 | /* The iov_len does not include the transport header bytes */ | ||
| 952 | rqstp->rq_arg.head[0].iov_len = (dr->argslen<<2) - dr->xprt_hlen; | ||
| 953 | rqstp->rq_arg.page_len = 0; | ||
| 954 | /* The rq_arg.len includes the transport header bytes */ | ||
| 955 | rqstp->rq_arg.len = dr->argslen<<2; | ||
| 956 | rqstp->rq_prot = dr->prot; | ||
| 957 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | ||
| 958 | rqstp->rq_addrlen = dr->addrlen; | ||
| 959 | /* Save off transport header len in case we get deferred again */ | ||
| 960 | rqstp->rq_xprt_hlen = dr->xprt_hlen; | ||
| 961 | rqstp->rq_daddr = dr->daddr; | ||
| 962 | rqstp->rq_respages = rqstp->rq_pages; | ||
| 963 | return (dr->argslen<<2) - dr->xprt_hlen; | ||
| 964 | } | ||
| 965 | |||
| 966 | |||
| 967 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) | ||
| 968 | { | ||
| 969 | struct svc_deferred_req *dr = NULL; | ||
| 970 | |||
| 971 | if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) | ||
| 972 | return NULL; | ||
| 973 | spin_lock(&xprt->xpt_lock); | ||
| 974 | clear_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
| 975 | if (!list_empty(&xprt->xpt_deferred)) { | ||
| 976 | dr = list_entry(xprt->xpt_deferred.next, | ||
| 977 | struct svc_deferred_req, | ||
| 978 | handle.recent); | ||
| 979 | list_del_init(&dr->handle.recent); | ||
| 980 | set_bit(XPT_DEFERRED, &xprt->xpt_flags); | ||
| 981 | } | ||
| 982 | spin_unlock(&xprt->xpt_lock); | ||
| 983 | return dr; | ||
| 984 | } | ||
| 985 | |||
| 986 | /* | ||
| 987 | * Return the transport instance pointer for the endpoint accepting | ||
| 988 | * connections/peer traffic from the specified transport class, | ||
| 989 | * address family and port. | ||
| 990 | * | ||
| 991 | * Specifying 0 for the address family or port is effectively a | ||
| 992 | * wild-card, and will result in matching the first transport in the | ||
| 993 | * service's list that has a matching class name. | ||
| 994 | */ | ||
| 995 | struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, | ||
| 996 | int af, int port) | ||
| 997 | { | ||
| 998 | struct svc_xprt *xprt; | ||
| 999 | struct svc_xprt *found = NULL; | ||
| 1000 | |||
| 1001 | /* Sanity check the args */ | ||
| 1002 | if (!serv || !xcl_name) | ||
| 1003 | return found; | ||
| 1004 | |||
| 1005 | spin_lock_bh(&serv->sv_lock); | ||
| 1006 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { | ||
| 1007 | if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) | ||
| 1008 | continue; | ||
| 1009 | if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) | ||
| 1010 | continue; | ||
| 1011 | if (port && port != svc_xprt_local_port(xprt)) | ||
| 1012 | continue; | ||
| 1013 | found = xprt; | ||
| 1014 | svc_xprt_get(xprt); | ||
| 1015 | break; | ||
| 1016 | } | ||
| 1017 | spin_unlock_bh(&serv->sv_lock); | ||
| 1018 | return found; | ||
| 1019 | } | ||
| 1020 | EXPORT_SYMBOL_GPL(svc_find_xprt); | ||
| 1021 | |||
| 1022 | /* | ||
| 1023 | * Format a buffer with a list of the active transports. A zero for | ||
| 1024 | * the buflen parameter disables target buffer overflow checking. | ||
| 1025 | */ | ||
| 1026 | int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) | ||
| 1027 | { | ||
| 1028 | struct svc_xprt *xprt; | ||
| 1029 | char xprt_str[64]; | ||
| 1030 | int totlen = 0; | ||
| 1031 | int len; | ||
| 1032 | |||
| 1033 | /* Sanity check args */ | ||
| 1034 | if (!serv) | ||
| 1035 | return 0; | ||
| 1036 | |||
| 1037 | spin_lock_bh(&serv->sv_lock); | ||
| 1038 | list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { | ||
| 1039 | len = snprintf(xprt_str, sizeof(xprt_str), | ||
| 1040 | "%s %d\n", xprt->xpt_class->xcl_name, | ||
| 1041 | svc_xprt_local_port(xprt)); | ||
| 1042 | /* If the string was truncated, replace with error string */ | ||
| 1043 | if (len >= sizeof(xprt_str)) | ||
| 1044 | strcpy(xprt_str, "name-too-long\n"); | ||
| 1045 | /* Don't overflow buffer */ | ||
| 1046 | len = strlen(xprt_str); | ||
| 1047 | if (buflen && (len + totlen >= buflen)) | ||
| 1048 | break; | ||
| 1049 | strcpy(buf+totlen, xprt_str); | ||
| 1050 | totlen += len; | ||
| 1051 | } | ||
| 1052 | spin_unlock_bh(&serv->sv_lock); | ||
| 1053 | return totlen; | ||
| 1054 | } | ||
| 1055 | EXPORT_SYMBOL_GPL(svc_xprt_names); | ||
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index af7c5f05c6e1..8a73cbb16052 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c | |||
| @@ -57,11 +57,13 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) | |||
| 57 | rqstp->rq_authop = aops; | 57 | rqstp->rq_authop = aops; |
| 58 | return aops->accept(rqstp, authp); | 58 | return aops->accept(rqstp, authp); |
| 59 | } | 59 | } |
| 60 | EXPORT_SYMBOL(svc_authenticate); | ||
| 60 | 61 | ||
| 61 | int svc_set_client(struct svc_rqst *rqstp) | 62 | int svc_set_client(struct svc_rqst *rqstp) |
| 62 | { | 63 | { |
| 63 | return rqstp->rq_authop->set_client(rqstp); | 64 | return rqstp->rq_authop->set_client(rqstp); |
| 64 | } | 65 | } |
| 66 | EXPORT_SYMBOL(svc_set_client); | ||
| 65 | 67 | ||
| 66 | /* A request, which was authenticated, has now executed. | 68 | /* A request, which was authenticated, has now executed. |
| 67 | * Time to finalise the credentials and verifier | 69 | * Time to finalise the credentials and verifier |
| @@ -93,6 +95,7 @@ svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops) | |||
| 93 | spin_unlock(&authtab_lock); | 95 | spin_unlock(&authtab_lock); |
| 94 | return rv; | 96 | return rv; |
| 95 | } | 97 | } |
| 98 | EXPORT_SYMBOL(svc_auth_register); | ||
| 96 | 99 | ||
| 97 | void | 100 | void |
| 98 | svc_auth_unregister(rpc_authflavor_t flavor) | 101 | svc_auth_unregister(rpc_authflavor_t flavor) |
| @@ -129,6 +132,7 @@ void auth_domain_put(struct auth_domain *dom) | |||
| 129 | spin_unlock(&auth_domain_lock); | 132 | spin_unlock(&auth_domain_lock); |
| 130 | } | 133 | } |
| 131 | } | 134 | } |
| 135 | EXPORT_SYMBOL(auth_domain_put); | ||
| 132 | 136 | ||
| 133 | struct auth_domain * | 137 | struct auth_domain * |
| 134 | auth_domain_lookup(char *name, struct auth_domain *new) | 138 | auth_domain_lookup(char *name, struct auth_domain *new) |
| @@ -153,8 +157,10 @@ auth_domain_lookup(char *name, struct auth_domain *new) | |||
| 153 | spin_unlock(&auth_domain_lock); | 157 | spin_unlock(&auth_domain_lock); |
| 154 | return new; | 158 | return new; |
| 155 | } | 159 | } |
| 160 | EXPORT_SYMBOL(auth_domain_lookup); | ||
| 156 | 161 | ||
| 157 | struct auth_domain *auth_domain_find(char *name) | 162 | struct auth_domain *auth_domain_find(char *name) |
| 158 | { | 163 | { |
| 159 | return auth_domain_lookup(name, NULL); | 164 | return auth_domain_lookup(name, NULL); |
| 160 | } | 165 | } |
| 166 | EXPORT_SYMBOL(auth_domain_find); | ||
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 411479411b21..3c64051e4555 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c | |||
| @@ -63,6 +63,7 @@ struct auth_domain *unix_domain_find(char *name) | |||
| 63 | rv = auth_domain_lookup(name, &new->h); | 63 | rv = auth_domain_lookup(name, &new->h); |
| 64 | } | 64 | } |
| 65 | } | 65 | } |
| 66 | EXPORT_SYMBOL(unix_domain_find); | ||
| 66 | 67 | ||
| 67 | static void svcauth_unix_domain_release(struct auth_domain *dom) | 68 | static void svcauth_unix_domain_release(struct auth_domain *dom) |
| 68 | { | 69 | { |
| @@ -340,6 +341,7 @@ int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom) | |||
| 340 | else | 341 | else |
| 341 | return -ENOMEM; | 342 | return -ENOMEM; |
| 342 | } | 343 | } |
| 344 | EXPORT_SYMBOL(auth_unix_add_addr); | ||
| 343 | 345 | ||
| 344 | int auth_unix_forget_old(struct auth_domain *dom) | 346 | int auth_unix_forget_old(struct auth_domain *dom) |
| 345 | { | 347 | { |
| @@ -351,6 +353,7 @@ int auth_unix_forget_old(struct auth_domain *dom) | |||
| 351 | udom->addr_changes++; | 353 | udom->addr_changes++; |
| 352 | return 0; | 354 | return 0; |
| 353 | } | 355 | } |
| 356 | EXPORT_SYMBOL(auth_unix_forget_old); | ||
| 354 | 357 | ||
| 355 | struct auth_domain *auth_unix_lookup(struct in_addr addr) | 358 | struct auth_domain *auth_unix_lookup(struct in_addr addr) |
| 356 | { | 359 | { |
| @@ -375,50 +378,56 @@ struct auth_domain *auth_unix_lookup(struct in_addr addr) | |||
| 375 | cache_put(&ipm->h, &ip_map_cache); | 378 | cache_put(&ipm->h, &ip_map_cache); |
| 376 | return rv; | 379 | return rv; |
| 377 | } | 380 | } |
| 381 | EXPORT_SYMBOL(auth_unix_lookup); | ||
| 378 | 382 | ||
| 379 | void svcauth_unix_purge(void) | 383 | void svcauth_unix_purge(void) |
| 380 | { | 384 | { |
| 381 | cache_purge(&ip_map_cache); | 385 | cache_purge(&ip_map_cache); |
| 382 | } | 386 | } |
| 387 | EXPORT_SYMBOL(svcauth_unix_purge); | ||
| 383 | 388 | ||
| 384 | static inline struct ip_map * | 389 | static inline struct ip_map * |
| 385 | ip_map_cached_get(struct svc_rqst *rqstp) | 390 | ip_map_cached_get(struct svc_rqst *rqstp) |
| 386 | { | 391 | { |
| 387 | struct ip_map *ipm; | 392 | struct ip_map *ipm = NULL; |
| 388 | struct svc_sock *svsk = rqstp->rq_sock; | 393 | struct svc_xprt *xprt = rqstp->rq_xprt; |
| 389 | spin_lock(&svsk->sk_lock); | 394 | |
| 390 | ipm = svsk->sk_info_authunix; | 395 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { |
| 391 | if (ipm != NULL) { | 396 | spin_lock(&xprt->xpt_lock); |
| 392 | if (!cache_valid(&ipm->h)) { | 397 | ipm = xprt->xpt_auth_cache; |
| 393 | /* | 398 | if (ipm != NULL) { |
| 394 | * The entry has been invalidated since it was | 399 | if (!cache_valid(&ipm->h)) { |
| 395 | * remembered, e.g. by a second mount from the | 400 | /* |
| 396 | * same IP address. | 401 | * The entry has been invalidated since it was |
| 397 | */ | 402 | * remembered, e.g. by a second mount from the |
| 398 | svsk->sk_info_authunix = NULL; | 403 | * same IP address. |
| 399 | spin_unlock(&svsk->sk_lock); | 404 | */ |
| 400 | cache_put(&ipm->h, &ip_map_cache); | 405 | xprt->xpt_auth_cache = NULL; |
| 401 | return NULL; | 406 | spin_unlock(&xprt->xpt_lock); |
| 407 | cache_put(&ipm->h, &ip_map_cache); | ||
| 408 | return NULL; | ||
| 409 | } | ||
| 410 | cache_get(&ipm->h); | ||
| 402 | } | 411 | } |
| 403 | cache_get(&ipm->h); | 412 | spin_unlock(&xprt->xpt_lock); |
| 404 | } | 413 | } |
| 405 | spin_unlock(&svsk->sk_lock); | ||
| 406 | return ipm; | 414 | return ipm; |
| 407 | } | 415 | } |
| 408 | 416 | ||
| 409 | static inline void | 417 | static inline void |
| 410 | ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) | 418 | ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm) |
| 411 | { | 419 | { |
| 412 | struct svc_sock *svsk = rqstp->rq_sock; | 420 | struct svc_xprt *xprt = rqstp->rq_xprt; |
| 413 | 421 | ||
| 414 | spin_lock(&svsk->sk_lock); | 422 | if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) { |
| 415 | if (svsk->sk_sock->type == SOCK_STREAM && | 423 | spin_lock(&xprt->xpt_lock); |
| 416 | svsk->sk_info_authunix == NULL) { | 424 | if (xprt->xpt_auth_cache == NULL) { |
| 417 | /* newly cached, keep the reference */ | 425 | /* newly cached, keep the reference */ |
| 418 | svsk->sk_info_authunix = ipm; | 426 | xprt->xpt_auth_cache = ipm; |
| 419 | ipm = NULL; | 427 | ipm = NULL; |
| 428 | } | ||
| 429 | spin_unlock(&xprt->xpt_lock); | ||
| 420 | } | 430 | } |
| 421 | spin_unlock(&svsk->sk_lock); | ||
| 422 | if (ipm) | 431 | if (ipm) |
| 423 | cache_put(&ipm->h, &ip_map_cache); | 432 | cache_put(&ipm->h, &ip_map_cache); |
| 424 | } | 433 | } |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index c75bffeb89eb..1d3e5fcc2cc4 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
| @@ -5,7 +5,7 @@ | |||
| 5 | * | 5 | * |
| 6 | * The server scheduling algorithm does not always distribute the load | 6 | * The server scheduling algorithm does not always distribute the load |
| 7 | * evenly when servicing a single client. May need to modify the | 7 | * evenly when servicing a single client. May need to modify the |
| 8 | * svc_sock_enqueue procedure... | 8 | * svc_xprt_enqueue procedure... |
| 9 | * | 9 | * |
| 10 | * TCP support is largely untested and may be a little slow. The problem | 10 | * TCP support is largely untested and may be a little slow. The problem |
| 11 | * is that we currently do two separate recvfrom's, one for the 4-byte | 11 | * is that we currently do two separate recvfrom's, one for the 4-byte |
| @@ -48,72 +48,40 @@ | |||
| 48 | #include <linux/sunrpc/svcsock.h> | 48 | #include <linux/sunrpc/svcsock.h> |
| 49 | #include <linux/sunrpc/stats.h> | 49 | #include <linux/sunrpc/stats.h> |
| 50 | 50 | ||
| 51 | /* SMP locking strategy: | 51 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT |
| 52 | * | ||
| 53 | * svc_pool->sp_lock protects most of the fields of that pool. | ||
| 54 | * svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt. | ||
| 55 | * when both need to be taken (rare), svc_serv->sv_lock is first. | ||
| 56 | * BKL protects svc_serv->sv_nrthread. | ||
| 57 | * svc_sock->sk_lock protects the svc_sock->sk_deferred list | ||
| 58 | * and the ->sk_info_authunix cache. | ||
| 59 | * svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply. | ||
| 60 | * | ||
| 61 | * Some flags can be set to certain values at any time | ||
| 62 | * providing that certain rules are followed: | ||
| 63 | * | ||
| 64 | * SK_CONN, SK_DATA, can be set or cleared at any time. | ||
| 65 | * after a set, svc_sock_enqueue must be called. | ||
| 66 | * after a clear, the socket must be read/accepted | ||
| 67 | * if this succeeds, it must be set again. | ||
| 68 | * SK_CLOSE can set at any time. It is never cleared. | ||
| 69 | * sk_inuse contains a bias of '1' until SK_DEAD is set. | ||
| 70 | * so when sk_inuse hits zero, we know the socket is dead | ||
| 71 | * and no-one is using it. | ||
| 72 | * SK_DEAD can only be set while SK_BUSY is held which ensures | ||
| 73 | * no other thread will be using the socket or will try to | ||
| 74 | * set SK_DEAD. | ||
| 75 | * | ||
| 76 | */ | ||
| 77 | |||
| 78 | #define RPCDBG_FACILITY RPCDBG_SVCSOCK | ||
| 79 | 52 | ||
| 80 | 53 | ||
| 81 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, | 54 | static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, |
| 82 | int *errp, int flags); | 55 | int *errp, int flags); |
| 83 | static void svc_delete_socket(struct svc_sock *svsk); | ||
| 84 | static void svc_udp_data_ready(struct sock *, int); | 56 | static void svc_udp_data_ready(struct sock *, int); |
| 85 | static int svc_udp_recvfrom(struct svc_rqst *); | 57 | static int svc_udp_recvfrom(struct svc_rqst *); |
| 86 | static int svc_udp_sendto(struct svc_rqst *); | 58 | static int svc_udp_sendto(struct svc_rqst *); |
| 87 | static void svc_close_socket(struct svc_sock *svsk); | 59 | static void svc_sock_detach(struct svc_xprt *); |
| 88 | 60 | static void svc_sock_free(struct svc_xprt *); | |
| 89 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); | ||
| 90 | static int svc_deferred_recv(struct svc_rqst *rqstp); | ||
| 91 | static struct cache_deferred_req *svc_defer(struct cache_req *req); | ||
| 92 | |||
| 93 | /* apparently the "standard" is that clients close | ||
| 94 | * idle connections after 5 minutes, servers after | ||
| 95 | * 6 minutes | ||
| 96 | * http://www.connectathon.org/talks96/nfstcp.pdf | ||
| 97 | */ | ||
| 98 | static int svc_conn_age_period = 6*60; | ||
| 99 | 61 | ||
| 62 | static struct svc_xprt *svc_create_socket(struct svc_serv *, int, | ||
| 63 | struct sockaddr *, int, int); | ||
| 100 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 64 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 101 | static struct lock_class_key svc_key[2]; | 65 | static struct lock_class_key svc_key[2]; |
| 102 | static struct lock_class_key svc_slock_key[2]; | 66 | static struct lock_class_key svc_slock_key[2]; |
| 103 | 67 | ||
| 104 | static inline void svc_reclassify_socket(struct socket *sock) | 68 | static void svc_reclassify_socket(struct socket *sock) |
| 105 | { | 69 | { |
| 106 | struct sock *sk = sock->sk; | 70 | struct sock *sk = sock->sk; |
| 107 | BUG_ON(sock_owned_by_user(sk)); | 71 | BUG_ON(sock_owned_by_user(sk)); |
| 108 | switch (sk->sk_family) { | 72 | switch (sk->sk_family) { |
| 109 | case AF_INET: | 73 | case AF_INET: |
| 110 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", | 74 | sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", |
| 111 | &svc_slock_key[0], "sk_lock-AF_INET-NFSD", &svc_key[0]); | 75 | &svc_slock_key[0], |
| 76 | "sk_xprt.xpt_lock-AF_INET-NFSD", | ||
| 77 | &svc_key[0]); | ||
| 112 | break; | 78 | break; |
| 113 | 79 | ||
| 114 | case AF_INET6: | 80 | case AF_INET6: |
| 115 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", | 81 | sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", |
| 116 | &svc_slock_key[1], "sk_lock-AF_INET6-NFSD", &svc_key[1]); | 82 | &svc_slock_key[1], |
| 83 | "sk_xprt.xpt_lock-AF_INET6-NFSD", | ||
| 84 | &svc_key[1]); | ||
| 117 | break; | 85 | break; |
| 118 | 86 | ||
| 119 | default: | 87 | default: |
| @@ -121,81 +89,26 @@ static inline void svc_reclassify_socket(struct socket *sock) | |||
| 121 | } | 89 | } |
| 122 | } | 90 | } |
| 123 | #else | 91 | #else |
| 124 | static inline void svc_reclassify_socket(struct socket *sock) | 92 | static void svc_reclassify_socket(struct socket *sock) |
| 125 | { | 93 | { |
| 126 | } | 94 | } |
| 127 | #endif | 95 | #endif |
| 128 | 96 | ||
| 129 | static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len) | ||
| 130 | { | ||
| 131 | switch (addr->sa_family) { | ||
| 132 | case AF_INET: | ||
| 133 | snprintf(buf, len, "%u.%u.%u.%u, port=%u", | ||
| 134 | NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), | ||
| 135 | ntohs(((struct sockaddr_in *) addr)->sin_port)); | ||
| 136 | break; | ||
| 137 | |||
| 138 | case AF_INET6: | ||
| 139 | snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", | ||
| 140 | NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), | ||
| 141 | ntohs(((struct sockaddr_in6 *) addr)->sin6_port)); | ||
| 142 | break; | ||
| 143 | |||
| 144 | default: | ||
| 145 | snprintf(buf, len, "unknown address type: %d", addr->sa_family); | ||
| 146 | break; | ||
| 147 | } | ||
| 148 | return buf; | ||
| 149 | } | ||
| 150 | |||
| 151 | /** | ||
| 152 | * svc_print_addr - Format rq_addr field for printing | ||
| 153 | * @rqstp: svc_rqst struct containing address to print | ||
| 154 | * @buf: target buffer for formatted address | ||
| 155 | * @len: length of target buffer | ||
| 156 | * | ||
| 157 | */ | ||
| 158 | char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) | ||
| 159 | { | ||
| 160 | return __svc_print_addr(svc_addr(rqstp), buf, len); | ||
| 161 | } | ||
| 162 | EXPORT_SYMBOL_GPL(svc_print_addr); | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Queue up an idle server thread. Must have pool->sp_lock held. | ||
| 166 | * Note: this is really a stack rather than a queue, so that we only | ||
| 167 | * use as many different threads as we need, and the rest don't pollute | ||
| 168 | * the cache. | ||
| 169 | */ | ||
| 170 | static inline void | ||
| 171 | svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
| 172 | { | ||
| 173 | list_add(&rqstp->rq_list, &pool->sp_threads); | ||
| 174 | } | ||
| 175 | |||
| 176 | /* | ||
| 177 | * Dequeue an nfsd thread. Must have pool->sp_lock held. | ||
| 178 | */ | ||
| 179 | static inline void | ||
| 180 | svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) | ||
| 181 | { | ||
| 182 | list_del(&rqstp->rq_list); | ||
| 183 | } | ||
| 184 | |||
| 185 | /* | 97 | /* |
| 186 | * Release an skbuff after use | 98 | * Release an skbuff after use |
| 187 | */ | 99 | */ |
| 188 | static inline void | 100 | static void svc_release_skb(struct svc_rqst *rqstp) |
| 189 | svc_release_skb(struct svc_rqst *rqstp) | ||
| 190 | { | 101 | { |
| 191 | struct sk_buff *skb = rqstp->rq_skbuff; | 102 | struct sk_buff *skb = rqstp->rq_xprt_ctxt; |
| 192 | struct svc_deferred_req *dr = rqstp->rq_deferred; | 103 | struct svc_deferred_req *dr = rqstp->rq_deferred; |
| 193 | 104 | ||
| 194 | if (skb) { | 105 | if (skb) { |
| 195 | rqstp->rq_skbuff = NULL; | 106 | struct svc_sock *svsk = |
| 107 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
| 108 | rqstp->rq_xprt_ctxt = NULL; | ||
| 196 | 109 | ||
| 197 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); | 110 | dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); |
| 198 | skb_free_datagram(rqstp->rq_sock->sk_sk, skb); | 111 | skb_free_datagram(svsk->sk_sk, skb); |
| 199 | } | 112 | } |
| 200 | if (dr) { | 113 | if (dr) { |
| 201 | rqstp->rq_deferred = NULL; | 114 | rqstp->rq_deferred = NULL; |
| @@ -203,253 +116,6 @@ svc_release_skb(struct svc_rqst *rqstp) | |||
| 203 | } | 116 | } |
| 204 | } | 117 | } |
| 205 | 118 | ||
| 206 | /* | ||
| 207 | * Any space to write? | ||
| 208 | */ | ||
| 209 | static inline unsigned long | ||
| 210 | svc_sock_wspace(struct svc_sock *svsk) | ||
| 211 | { | ||
| 212 | int wspace; | ||
| 213 | |||
| 214 | if (svsk->sk_sock->type == SOCK_STREAM) | ||
| 215 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
| 216 | else | ||
| 217 | wspace = sock_wspace(svsk->sk_sk); | ||
| 218 | |||
| 219 | return wspace; | ||
| 220 | } | ||
| 221 | |||
| 222 | /* | ||
| 223 | * Queue up a socket with data pending. If there are idle nfsd | ||
| 224 | * processes, wake 'em up. | ||
| 225 | * | ||
| 226 | */ | ||
| 227 | static void | ||
| 228 | svc_sock_enqueue(struct svc_sock *svsk) | ||
| 229 | { | ||
| 230 | struct svc_serv *serv = svsk->sk_server; | ||
| 231 | struct svc_pool *pool; | ||
| 232 | struct svc_rqst *rqstp; | ||
| 233 | int cpu; | ||
| 234 | |||
| 235 | if (!(svsk->sk_flags & | ||
| 236 | ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) )) | ||
| 237 | return; | ||
| 238 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
| 239 | return; | ||
| 240 | |||
| 241 | cpu = get_cpu(); | ||
| 242 | pool = svc_pool_for_cpu(svsk->sk_server, cpu); | ||
| 243 | put_cpu(); | ||
| 244 | |||
| 245 | spin_lock_bh(&pool->sp_lock); | ||
| 246 | |||
| 247 | if (!list_empty(&pool->sp_threads) && | ||
| 248 | !list_empty(&pool->sp_sockets)) | ||
| 249 | printk(KERN_ERR | ||
| 250 | "svc_sock_enqueue: threads and sockets both waiting??\n"); | ||
| 251 | |||
| 252 | if (test_bit(SK_DEAD, &svsk->sk_flags)) { | ||
| 253 | /* Don't enqueue dead sockets */ | ||
| 254 | dprintk("svc: socket %p is dead, not enqueued\n", svsk->sk_sk); | ||
| 255 | goto out_unlock; | ||
| 256 | } | ||
| 257 | |||
| 258 | /* Mark socket as busy. It will remain in this state until the | ||
| 259 | * server has processed all pending data and put the socket back | ||
| 260 | * on the idle list. We update SK_BUSY atomically because | ||
| 261 | * it also guards against trying to enqueue the svc_sock twice. | ||
| 262 | */ | ||
| 263 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) { | ||
| 264 | /* Don't enqueue socket while already enqueued */ | ||
| 265 | dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk); | ||
| 266 | goto out_unlock; | ||
| 267 | } | ||
| 268 | BUG_ON(svsk->sk_pool != NULL); | ||
| 269 | svsk->sk_pool = pool; | ||
| 270 | |||
| 271 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
| 272 | if (((atomic_read(&svsk->sk_reserved) + serv->sv_max_mesg)*2 | ||
| 273 | > svc_sock_wspace(svsk)) | ||
| 274 | && !test_bit(SK_CLOSE, &svsk->sk_flags) | ||
| 275 | && !test_bit(SK_CONN, &svsk->sk_flags)) { | ||
| 276 | /* Don't enqueue while not enough space for reply */ | ||
| 277 | dprintk("svc: socket %p no space, %d*2 > %ld, not enqueued\n", | ||
| 278 | svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_max_mesg, | ||
| 279 | svc_sock_wspace(svsk)); | ||
| 280 | svsk->sk_pool = NULL; | ||
| 281 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
| 282 | goto out_unlock; | ||
| 283 | } | ||
| 284 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
| 285 | |||
| 286 | |||
| 287 | if (!list_empty(&pool->sp_threads)) { | ||
| 288 | rqstp = list_entry(pool->sp_threads.next, | ||
| 289 | struct svc_rqst, | ||
| 290 | rq_list); | ||
| 291 | dprintk("svc: socket %p served by daemon %p\n", | ||
| 292 | svsk->sk_sk, rqstp); | ||
| 293 | svc_thread_dequeue(pool, rqstp); | ||
| 294 | if (rqstp->rq_sock) | ||
| 295 | printk(KERN_ERR | ||
| 296 | "svc_sock_enqueue: server %p, rq_sock=%p!\n", | ||
| 297 | rqstp, rqstp->rq_sock); | ||
| 298 | rqstp->rq_sock = svsk; | ||
| 299 | atomic_inc(&svsk->sk_inuse); | ||
| 300 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
| 301 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
| 302 | BUG_ON(svsk->sk_pool != pool); | ||
| 303 | wake_up(&rqstp->rq_wait); | ||
| 304 | } else { | ||
| 305 | dprintk("svc: socket %p put into queue\n", svsk->sk_sk); | ||
| 306 | list_add_tail(&svsk->sk_ready, &pool->sp_sockets); | ||
| 307 | BUG_ON(svsk->sk_pool != pool); | ||
| 308 | } | ||
| 309 | |||
| 310 | out_unlock: | ||
| 311 | spin_unlock_bh(&pool->sp_lock); | ||
| 312 | } | ||
| 313 | |||
| 314 | /* | ||
| 315 | * Dequeue the first socket. Must be called with the pool->sp_lock held. | ||
| 316 | */ | ||
| 317 | static inline struct svc_sock * | ||
| 318 | svc_sock_dequeue(struct svc_pool *pool) | ||
| 319 | { | ||
| 320 | struct svc_sock *svsk; | ||
| 321 | |||
| 322 | if (list_empty(&pool->sp_sockets)) | ||
| 323 | return NULL; | ||
| 324 | |||
| 325 | svsk = list_entry(pool->sp_sockets.next, | ||
| 326 | struct svc_sock, sk_ready); | ||
| 327 | list_del_init(&svsk->sk_ready); | ||
| 328 | |||
| 329 | dprintk("svc: socket %p dequeued, inuse=%d\n", | ||
| 330 | svsk->sk_sk, atomic_read(&svsk->sk_inuse)); | ||
| 331 | |||
| 332 | return svsk; | ||
| 333 | } | ||
| 334 | |||
| 335 | /* | ||
| 336 | * Having read something from a socket, check whether it | ||
| 337 | * needs to be re-enqueued. | ||
| 338 | * Note: SK_DATA only gets cleared when a read-attempt finds | ||
| 339 | * no (or insufficient) data. | ||
| 340 | */ | ||
| 341 | static inline void | ||
| 342 | svc_sock_received(struct svc_sock *svsk) | ||
| 343 | { | ||
| 344 | svsk->sk_pool = NULL; | ||
| 345 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
| 346 | svc_sock_enqueue(svsk); | ||
| 347 | } | ||
| 348 | |||
| 349 | |||
| 350 | /** | ||
| 351 | * svc_reserve - change the space reserved for the reply to a request. | ||
| 352 | * @rqstp: The request in question | ||
| 353 | * @space: new max space to reserve | ||
| 354 | * | ||
| 355 | * Each request reserves some space on the output queue of the socket | ||
| 356 | * to make sure the reply fits. This function reduces that reserved | ||
| 357 | * space to be the amount of space used already, plus @space. | ||
| 358 | * | ||
| 359 | */ | ||
| 360 | void svc_reserve(struct svc_rqst *rqstp, int space) | ||
| 361 | { | ||
| 362 | space += rqstp->rq_res.head[0].iov_len; | ||
| 363 | |||
| 364 | if (space < rqstp->rq_reserved) { | ||
| 365 | struct svc_sock *svsk = rqstp->rq_sock; | ||
| 366 | atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved); | ||
| 367 | rqstp->rq_reserved = space; | ||
| 368 | |||
| 369 | svc_sock_enqueue(svsk); | ||
| 370 | } | ||
| 371 | } | ||
| 372 | |||
| 373 | /* | ||
| 374 | * Release a socket after use. | ||
| 375 | */ | ||
| 376 | static inline void | ||
| 377 | svc_sock_put(struct svc_sock *svsk) | ||
| 378 | { | ||
| 379 | if (atomic_dec_and_test(&svsk->sk_inuse)) { | ||
| 380 | BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); | ||
| 381 | |||
| 382 | dprintk("svc: releasing dead socket\n"); | ||
| 383 | if (svsk->sk_sock->file) | ||
| 384 | sockfd_put(svsk->sk_sock); | ||
| 385 | else | ||
| 386 | sock_release(svsk->sk_sock); | ||
| 387 | if (svsk->sk_info_authunix != NULL) | ||
| 388 | svcauth_unix_info_release(svsk->sk_info_authunix); | ||
| 389 | kfree(svsk); | ||
| 390 | } | ||
| 391 | } | ||
| 392 | |||
| 393 | static void | ||
| 394 | svc_sock_release(struct svc_rqst *rqstp) | ||
| 395 | { | ||
| 396 | struct svc_sock *svsk = rqstp->rq_sock; | ||
| 397 | |||
| 398 | svc_release_skb(rqstp); | ||
| 399 | |||
| 400 | svc_free_res_pages(rqstp); | ||
| 401 | rqstp->rq_res.page_len = 0; | ||
| 402 | rqstp->rq_res.page_base = 0; | ||
| 403 | |||
| 404 | |||
| 405 | /* Reset response buffer and release | ||
| 406 | * the reservation. | ||
| 407 | * But first, check that enough space was reserved | ||
| 408 | * for the reply, otherwise we have a bug! | ||
| 409 | */ | ||
| 410 | if ((rqstp->rq_res.len) > rqstp->rq_reserved) | ||
| 411 | printk(KERN_ERR "RPC request reserved %d but used %d\n", | ||
| 412 | rqstp->rq_reserved, | ||
| 413 | rqstp->rq_res.len); | ||
| 414 | |||
| 415 | rqstp->rq_res.head[0].iov_len = 0; | ||
| 416 | svc_reserve(rqstp, 0); | ||
| 417 | rqstp->rq_sock = NULL; | ||
| 418 | |||
| 419 | svc_sock_put(svsk); | ||
| 420 | } | ||
| 421 | |||
| 422 | /* | ||
| 423 | * External function to wake up a server waiting for data | ||
| 424 | * This really only makes sense for services like lockd | ||
| 425 | * which have exactly one thread anyway. | ||
| 426 | */ | ||
| 427 | void | ||
| 428 | svc_wake_up(struct svc_serv *serv) | ||
| 429 | { | ||
| 430 | struct svc_rqst *rqstp; | ||
| 431 | unsigned int i; | ||
| 432 | struct svc_pool *pool; | ||
| 433 | |||
| 434 | for (i = 0; i < serv->sv_nrpools; i++) { | ||
| 435 | pool = &serv->sv_pools[i]; | ||
| 436 | |||
| 437 | spin_lock_bh(&pool->sp_lock); | ||
| 438 | if (!list_empty(&pool->sp_threads)) { | ||
| 439 | rqstp = list_entry(pool->sp_threads.next, | ||
| 440 | struct svc_rqst, | ||
| 441 | rq_list); | ||
| 442 | dprintk("svc: daemon %p woken up.\n", rqstp); | ||
| 443 | /* | ||
| 444 | svc_thread_dequeue(pool, rqstp); | ||
| 445 | rqstp->rq_sock = NULL; | ||
| 446 | */ | ||
| 447 | wake_up(&rqstp->rq_wait); | ||
| 448 | } | ||
| 449 | spin_unlock_bh(&pool->sp_lock); | ||
| 450 | } | ||
| 451 | } | ||
| 452 | |||
| 453 | union svc_pktinfo_u { | 119 | union svc_pktinfo_u { |
| 454 | struct in_pktinfo pkti; | 120 | struct in_pktinfo pkti; |
| 455 | struct in6_pktinfo pkti6; | 121 | struct in6_pktinfo pkti6; |
| @@ -459,7 +125,9 @@ union svc_pktinfo_u { | |||
| 459 | 125 | ||
| 460 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | 126 | static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) |
| 461 | { | 127 | { |
| 462 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 128 | struct svc_sock *svsk = |
| 129 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
| 130 | switch (svsk->sk_sk->sk_family) { | ||
| 463 | case AF_INET: { | 131 | case AF_INET: { |
| 464 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 132 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
| 465 | 133 | ||
| @@ -489,10 +157,10 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) | |||
| 489 | /* | 157 | /* |
| 490 | * Generic sendto routine | 158 | * Generic sendto routine |
| 491 | */ | 159 | */ |
| 492 | static int | 160 | static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) |
| 493 | svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | ||
| 494 | { | 161 | { |
| 495 | struct svc_sock *svsk = rqstp->rq_sock; | 162 | struct svc_sock *svsk = |
| 163 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
| 496 | struct socket *sock = svsk->sk_sock; | 164 | struct socket *sock = svsk->sk_sock; |
| 497 | int slen; | 165 | int slen; |
| 498 | union { | 166 | union { |
| @@ -565,7 +233,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) | |||
| 565 | } | 233 | } |
| 566 | out: | 234 | out: |
| 567 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", | 235 | dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", |
| 568 | rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, | 236 | svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, |
| 569 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); | 237 | xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); |
| 570 | 238 | ||
| 571 | return len; | 239 | return len; |
| @@ -602,7 +270,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
| 602 | if (!serv) | 270 | if (!serv) |
| 603 | return 0; | 271 | return 0; |
| 604 | spin_lock_bh(&serv->sv_lock); | 272 | spin_lock_bh(&serv->sv_lock); |
| 605 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { | 273 | list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { |
| 606 | int onelen = one_sock_name(buf+len, svsk); | 274 | int onelen = one_sock_name(buf+len, svsk); |
| 607 | if (toclose && strcmp(toclose, buf+len) == 0) | 275 | if (toclose && strcmp(toclose, buf+len) == 0) |
| 608 | closesk = svsk; | 276 | closesk = svsk; |
| @@ -614,7 +282,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) | |||
| 614 | /* Should unregister with portmap, but you cannot | 282 | /* Should unregister with portmap, but you cannot |
| 615 | * unregister just one protocol... | 283 | * unregister just one protocol... |
| 616 | */ | 284 | */ |
| 617 | svc_close_socket(closesk); | 285 | svc_close_xprt(&closesk->sk_xprt); |
| 618 | else if (toclose) | 286 | else if (toclose) |
| 619 | return -ENOENT; | 287 | return -ENOENT; |
| 620 | return len; | 288 | return len; |
| @@ -624,8 +292,7 @@ EXPORT_SYMBOL(svc_sock_names); | |||
| 624 | /* | 292 | /* |
| 625 | * Check input queue length | 293 | * Check input queue length |
| 626 | */ | 294 | */ |
| 627 | static int | 295 | static int svc_recv_available(struct svc_sock *svsk) |
| 628 | svc_recv_available(struct svc_sock *svsk) | ||
| 629 | { | 296 | { |
| 630 | struct socket *sock = svsk->sk_sock; | 297 | struct socket *sock = svsk->sk_sock; |
| 631 | int avail, err; | 298 | int avail, err; |
| @@ -638,48 +305,31 @@ svc_recv_available(struct svc_sock *svsk) | |||
| 638 | /* | 305 | /* |
| 639 | * Generic recvfrom routine. | 306 | * Generic recvfrom routine. |
| 640 | */ | 307 | */ |
| 641 | static int | 308 | static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, |
| 642 | svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) | 309 | int buflen) |
| 643 | { | 310 | { |
| 644 | struct svc_sock *svsk = rqstp->rq_sock; | 311 | struct svc_sock *svsk = |
| 312 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
| 645 | struct msghdr msg = { | 313 | struct msghdr msg = { |
| 646 | .msg_flags = MSG_DONTWAIT, | 314 | .msg_flags = MSG_DONTWAIT, |
| 647 | }; | 315 | }; |
| 648 | struct sockaddr *sin; | ||
| 649 | int len; | 316 | int len; |
| 650 | 317 | ||
| 318 | rqstp->rq_xprt_hlen = 0; | ||
| 319 | |||
| 651 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, | 320 | len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, |
| 652 | msg.msg_flags); | 321 | msg.msg_flags); |
| 653 | 322 | ||
| 654 | /* sock_recvmsg doesn't fill in the name/namelen, so we must.. | ||
| 655 | */ | ||
| 656 | memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); | ||
| 657 | rqstp->rq_addrlen = svsk->sk_remotelen; | ||
| 658 | |||
| 659 | /* Destination address in request is needed for binding the | ||
| 660 | * source address in RPC callbacks later. | ||
| 661 | */ | ||
| 662 | sin = (struct sockaddr *)&svsk->sk_local; | ||
| 663 | switch (sin->sa_family) { | ||
| 664 | case AF_INET: | ||
| 665 | rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; | ||
| 666 | break; | ||
| 667 | case AF_INET6: | ||
| 668 | rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; | ||
| 669 | break; | ||
| 670 | } | ||
| 671 | |||
| 672 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", | 323 | dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", |
| 673 | svsk, iov[0].iov_base, iov[0].iov_len, len); | 324 | svsk, iov[0].iov_base, iov[0].iov_len, len); |
| 674 | |||
| 675 | return len; | 325 | return len; |
| 676 | } | 326 | } |
| 677 | 327 | ||
| 678 | /* | 328 | /* |
| 679 | * Set socket snd and rcv buffer lengths | 329 | * Set socket snd and rcv buffer lengths |
| 680 | */ | 330 | */ |
| 681 | static inline void | 331 | static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, |
| 682 | svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | 332 | unsigned int rcv) |
| 683 | { | 333 | { |
| 684 | #if 0 | 334 | #if 0 |
| 685 | mm_segment_t oldfs; | 335 | mm_segment_t oldfs; |
| @@ -704,16 +354,16 @@ svc_sock_setbufsize(struct socket *sock, unsigned int snd, unsigned int rcv) | |||
| 704 | /* | 354 | /* |
| 705 | * INET callback when data has been received on the socket. | 355 | * INET callback when data has been received on the socket. |
| 706 | */ | 356 | */ |
| 707 | static void | 357 | static void svc_udp_data_ready(struct sock *sk, int count) |
| 708 | svc_udp_data_ready(struct sock *sk, int count) | ||
| 709 | { | 358 | { |
| 710 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 359 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
| 711 | 360 | ||
| 712 | if (svsk) { | 361 | if (svsk) { |
| 713 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", | 362 | dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", |
| 714 | svsk, sk, count, test_bit(SK_BUSY, &svsk->sk_flags)); | 363 | svsk, sk, count, |
| 715 | set_bit(SK_DATA, &svsk->sk_flags); | 364 | test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
| 716 | svc_sock_enqueue(svsk); | 365 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 366 | svc_xprt_enqueue(&svsk->sk_xprt); | ||
| 717 | } | 367 | } |
| 718 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 368 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
| 719 | wake_up_interruptible(sk->sk_sleep); | 369 | wake_up_interruptible(sk->sk_sleep); |
| @@ -722,15 +372,14 @@ svc_udp_data_ready(struct sock *sk, int count) | |||
| 722 | /* | 372 | /* |
| 723 | * INET callback when space is newly available on the socket. | 373 | * INET callback when space is newly available on the socket. |
| 724 | */ | 374 | */ |
| 725 | static void | 375 | static void svc_write_space(struct sock *sk) |
| 726 | svc_write_space(struct sock *sk) | ||
| 727 | { | 376 | { |
| 728 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); | 377 | struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); |
| 729 | 378 | ||
| 730 | if (svsk) { | 379 | if (svsk) { |
| 731 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", | 380 | dprintk("svc: socket %p(inet %p), write_space busy=%d\n", |
| 732 | svsk, sk, test_bit(SK_BUSY, &svsk->sk_flags)); | 381 | svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); |
| 733 | svc_sock_enqueue(svsk); | 382 | svc_xprt_enqueue(&svsk->sk_xprt); |
| 734 | } | 383 | } |
| 735 | 384 | ||
| 736 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { | 385 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { |
| @@ -740,10 +389,19 @@ svc_write_space(struct sock *sk) | |||
| 740 | } | 389 | } |
| 741 | } | 390 | } |
| 742 | 391 | ||
| 743 | static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | 392 | /* |
| 744 | struct cmsghdr *cmh) | 393 | * Copy the UDP datagram's destination address to the rqstp structure. |
| 394 | * The 'destination' address in this case is the address to which the | ||
| 395 | * peer sent the datagram, i.e. our local address. For multihomed | ||
| 396 | * hosts, this can change from msg to msg. Note that only the IP | ||
| 397 | * address changes, the port number should remain the same. | ||
| 398 | */ | ||
| 399 | static void svc_udp_get_dest_address(struct svc_rqst *rqstp, | ||
| 400 | struct cmsghdr *cmh) | ||
| 745 | { | 401 | { |
| 746 | switch (rqstp->rq_sock->sk_sk->sk_family) { | 402 | struct svc_sock *svsk = |
| 403 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); | ||
| 404 | switch (svsk->sk_sk->sk_family) { | ||
| 747 | case AF_INET: { | 405 | case AF_INET: { |
| 748 | struct in_pktinfo *pki = CMSG_DATA(cmh); | 406 | struct in_pktinfo *pki = CMSG_DATA(cmh); |
| 749 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; | 407 | rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; |
| @@ -760,11 +418,11 @@ static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, | |||
| 760 | /* | 418 | /* |
| 761 | * Receive a datagram from a UDP socket. | 419 | * Receive a datagram from a UDP socket. |
| 762 | */ | 420 | */ |
| 763 | static int | 421 | static int svc_udp_recvfrom(struct svc_rqst *rqstp) |
| 764 | svc_udp_recvfrom(struct svc_rqst *rqstp) | ||
| 765 | { | 422 | { |
| 766 | struct svc_sock *svsk = rqstp->rq_sock; | 423 | struct svc_sock *svsk = |
| 767 | struct svc_serv *serv = svsk->sk_server; | 424 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
| 425 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
| 768 | struct sk_buff *skb; | 426 | struct sk_buff *skb; |
| 769 | union { | 427 | union { |
| 770 | struct cmsghdr hdr; | 428 | struct cmsghdr hdr; |
| @@ -779,7 +437,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
| 779 | .msg_flags = MSG_DONTWAIT, | 437 | .msg_flags = MSG_DONTWAIT, |
| 780 | }; | 438 | }; |
| 781 | 439 | ||
| 782 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | 440 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
| 783 | /* udp sockets need large rcvbuf as all pending | 441 | /* udp sockets need large rcvbuf as all pending |
| 784 | * requests are still in that buffer. sndbuf must | 442 | * requests are still in that buffer. sndbuf must |
| 785 | * also be large enough that there is enough space | 443 | * also be large enough that there is enough space |
| @@ -792,17 +450,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
| 792 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 450 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
| 793 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); | 451 | (serv->sv_nrthreads+3) * serv->sv_max_mesg); |
| 794 | 452 | ||
| 795 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 453 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 796 | svc_sock_received(svsk); | ||
| 797 | return svc_deferred_recv(rqstp); | ||
| 798 | } | ||
| 799 | |||
| 800 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
| 801 | svc_delete_socket(svsk); | ||
| 802 | return 0; | ||
| 803 | } | ||
| 804 | |||
| 805 | clear_bit(SK_DATA, &svsk->sk_flags); | ||
| 806 | skb = NULL; | 454 | skb = NULL; |
| 807 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, | 455 | err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, |
| 808 | 0, 0, MSG_PEEK | MSG_DONTWAIT); | 456 | 0, 0, MSG_PEEK | MSG_DONTWAIT); |
| @@ -813,24 +461,27 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
| 813 | if (err != -EAGAIN) { | 461 | if (err != -EAGAIN) { |
| 814 | /* possibly an icmp error */ | 462 | /* possibly an icmp error */ |
| 815 | dprintk("svc: recvfrom returned error %d\n", -err); | 463 | dprintk("svc: recvfrom returned error %d\n", -err); |
| 816 | set_bit(SK_DATA, &svsk->sk_flags); | 464 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 817 | } | 465 | } |
| 818 | svc_sock_received(svsk); | 466 | svc_xprt_received(&svsk->sk_xprt); |
| 819 | return -EAGAIN; | 467 | return -EAGAIN; |
| 820 | } | 468 | } |
| 821 | rqstp->rq_addrlen = sizeof(rqstp->rq_addr); | 469 | len = svc_addr_len(svc_addr(rqstp)); |
| 470 | if (len < 0) | ||
| 471 | return len; | ||
| 472 | rqstp->rq_addrlen = len; | ||
| 822 | if (skb->tstamp.tv64 == 0) { | 473 | if (skb->tstamp.tv64 == 0) { |
| 823 | skb->tstamp = ktime_get_real(); | 474 | skb->tstamp = ktime_get_real(); |
| 824 | /* Don't enable netstamp, sunrpc doesn't | 475 | /* Don't enable netstamp, sunrpc doesn't |
| 825 | need that much accuracy */ | 476 | need that much accuracy */ |
| 826 | } | 477 | } |
| 827 | svsk->sk_sk->sk_stamp = skb->tstamp; | 478 | svsk->sk_sk->sk_stamp = skb->tstamp; |
| 828 | set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ | 479 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ |
| 829 | 480 | ||
| 830 | /* | 481 | /* |
| 831 | * Maybe more packets - kick another thread ASAP. | 482 | * Maybe more packets - kick another thread ASAP. |
| 832 | */ | 483 | */ |
| 833 | svc_sock_received(svsk); | 484 | svc_xprt_received(&svsk->sk_xprt); |
| 834 | 485 | ||
| 835 | len = skb->len - sizeof(struct udphdr); | 486 | len = skb->len - sizeof(struct udphdr); |
| 836 | rqstp->rq_arg.len = len; | 487 | rqstp->rq_arg.len = len; |
| @@ -861,13 +512,14 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
| 861 | skb_free_datagram(svsk->sk_sk, skb); | 512 | skb_free_datagram(svsk->sk_sk, skb); |
| 862 | } else { | 513 | } else { |
| 863 | /* we can use it in-place */ | 514 | /* we can use it in-place */ |
| 864 | rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); | 515 | rqstp->rq_arg.head[0].iov_base = skb->data + |
| 516 | sizeof(struct udphdr); | ||
| 865 | rqstp->rq_arg.head[0].iov_len = len; | 517 | rqstp->rq_arg.head[0].iov_len = len; |
| 866 | if (skb_checksum_complete(skb)) { | 518 | if (skb_checksum_complete(skb)) { |
| 867 | skb_free_datagram(svsk->sk_sk, skb); | 519 | skb_free_datagram(svsk->sk_sk, skb); |
| 868 | return 0; | 520 | return 0; |
| 869 | } | 521 | } |
| 870 | rqstp->rq_skbuff = skb; | 522 | rqstp->rq_xprt_ctxt = skb; |
| 871 | } | 523 | } |
| 872 | 524 | ||
| 873 | rqstp->rq_arg.page_base = 0; | 525 | rqstp->rq_arg.page_base = 0; |
| @@ -900,27 +552,81 @@ svc_udp_sendto(struct svc_rqst *rqstp) | |||
| 900 | return error; | 552 | return error; |
| 901 | } | 553 | } |
| 902 | 554 | ||
| 903 | static void | 555 | static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp) |
| 904 | svc_udp_init(struct svc_sock *svsk) | 556 | { |
| 557 | } | ||
| 558 | |||
| 559 | static int svc_udp_has_wspace(struct svc_xprt *xprt) | ||
| 560 | { | ||
| 561 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
| 562 | struct svc_serv *serv = xprt->xpt_server; | ||
| 563 | unsigned long required; | ||
| 564 | |||
| 565 | /* | ||
| 566 | * Set the SOCK_NOSPACE flag before checking the available | ||
| 567 | * sock space. | ||
| 568 | */ | ||
| 569 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
| 570 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
| 571 | if (required*2 > sock_wspace(svsk->sk_sk)) | ||
| 572 | return 0; | ||
| 573 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
| 574 | return 1; | ||
| 575 | } | ||
| 576 | |||
| 577 | static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) | ||
| 578 | { | ||
| 579 | BUG(); | ||
| 580 | return NULL; | ||
| 581 | } | ||
| 582 | |||
| 583 | static struct svc_xprt *svc_udp_create(struct svc_serv *serv, | ||
| 584 | struct sockaddr *sa, int salen, | ||
| 585 | int flags) | ||
| 586 | { | ||
| 587 | return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); | ||
| 588 | } | ||
| 589 | |||
| 590 | static struct svc_xprt_ops svc_udp_ops = { | ||
| 591 | .xpo_create = svc_udp_create, | ||
| 592 | .xpo_recvfrom = svc_udp_recvfrom, | ||
| 593 | .xpo_sendto = svc_udp_sendto, | ||
| 594 | .xpo_release_rqst = svc_release_skb, | ||
| 595 | .xpo_detach = svc_sock_detach, | ||
| 596 | .xpo_free = svc_sock_free, | ||
| 597 | .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, | ||
| 598 | .xpo_has_wspace = svc_udp_has_wspace, | ||
| 599 | .xpo_accept = svc_udp_accept, | ||
| 600 | }; | ||
| 601 | |||
| 602 | static struct svc_xprt_class svc_udp_class = { | ||
| 603 | .xcl_name = "udp", | ||
| 604 | .xcl_owner = THIS_MODULE, | ||
| 605 | .xcl_ops = &svc_udp_ops, | ||
| 606 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, | ||
| 607 | }; | ||
| 608 | |||
| 609 | static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
| 905 | { | 610 | { |
| 906 | int one = 1; | 611 | int one = 1; |
| 907 | mm_segment_t oldfs; | 612 | mm_segment_t oldfs; |
| 908 | 613 | ||
| 614 | svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); | ||
| 615 | clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); | ||
| 909 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; | 616 | svsk->sk_sk->sk_data_ready = svc_udp_data_ready; |
| 910 | svsk->sk_sk->sk_write_space = svc_write_space; | 617 | svsk->sk_sk->sk_write_space = svc_write_space; |
| 911 | svsk->sk_recvfrom = svc_udp_recvfrom; | ||
| 912 | svsk->sk_sendto = svc_udp_sendto; | ||
| 913 | 618 | ||
| 914 | /* initialise setting must have enough space to | 619 | /* initialise setting must have enough space to |
| 915 | * receive and respond to one request. | 620 | * receive and respond to one request. |
| 916 | * svc_udp_recvfrom will re-adjust if necessary | 621 | * svc_udp_recvfrom will re-adjust if necessary |
| 917 | */ | 622 | */ |
| 918 | svc_sock_setbufsize(svsk->sk_sock, | 623 | svc_sock_setbufsize(svsk->sk_sock, |
| 919 | 3 * svsk->sk_server->sv_max_mesg, | 624 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
| 920 | 3 * svsk->sk_server->sv_max_mesg); | 625 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
| 921 | 626 | ||
| 922 | set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ | 627 | /* data might have come in before data_ready set up */ |
| 923 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 628 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 629 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); | ||
| 924 | 630 | ||
| 925 | oldfs = get_fs(); | 631 | oldfs = get_fs(); |
| 926 | set_fs(KERNEL_DS); | 632 | set_fs(KERNEL_DS); |
| @@ -934,8 +640,7 @@ svc_udp_init(struct svc_sock *svsk) | |||
| 934 | * A data_ready event on a listening socket means there's a connection | 640 | * A data_ready event on a listening socket means there's a connection |
| 935 | * pending. Do not use state_change as a substitute for it. | 641 | * pending. Do not use state_change as a substitute for it. |
| 936 | */ | 642 | */ |
| 937 | static void | 643 | static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) |
| 938 | svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | ||
| 939 | { | 644 | { |
| 940 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 645 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
| 941 | 646 | ||
| @@ -954,8 +659,8 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
| 954 | */ | 659 | */ |
| 955 | if (sk->sk_state == TCP_LISTEN) { | 660 | if (sk->sk_state == TCP_LISTEN) { |
| 956 | if (svsk) { | 661 | if (svsk) { |
| 957 | set_bit(SK_CONN, &svsk->sk_flags); | 662 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
| 958 | svc_sock_enqueue(svsk); | 663 | svc_xprt_enqueue(&svsk->sk_xprt); |
| 959 | } else | 664 | } else |
| 960 | printk("svc: socket %p: no user data\n", sk); | 665 | printk("svc: socket %p: no user data\n", sk); |
| 961 | } | 666 | } |
| @@ -967,8 +672,7 @@ svc_tcp_listen_data_ready(struct sock *sk, int count_unused) | |||
| 967 | /* | 672 | /* |
| 968 | * A state change on a connected socket means it's dying or dead. | 673 | * A state change on a connected socket means it's dying or dead. |
| 969 | */ | 674 | */ |
| 970 | static void | 675 | static void svc_tcp_state_change(struct sock *sk) |
| 971 | svc_tcp_state_change(struct sock *sk) | ||
| 972 | { | 676 | { |
| 973 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 677 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
| 974 | 678 | ||
| @@ -978,51 +682,36 @@ svc_tcp_state_change(struct sock *sk) | |||
| 978 | if (!svsk) | 682 | if (!svsk) |
| 979 | printk("svc: socket %p: no user data\n", sk); | 683 | printk("svc: socket %p: no user data\n", sk); |
| 980 | else { | 684 | else { |
| 981 | set_bit(SK_CLOSE, &svsk->sk_flags); | 685 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
| 982 | svc_sock_enqueue(svsk); | 686 | svc_xprt_enqueue(&svsk->sk_xprt); |
| 983 | } | 687 | } |
| 984 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 688 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
| 985 | wake_up_interruptible_all(sk->sk_sleep); | 689 | wake_up_interruptible_all(sk->sk_sleep); |
| 986 | } | 690 | } |
| 987 | 691 | ||
| 988 | static void | 692 | static void svc_tcp_data_ready(struct sock *sk, int count) |
| 989 | svc_tcp_data_ready(struct sock *sk, int count) | ||
| 990 | { | 693 | { |
| 991 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; | 694 | struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; |
| 992 | 695 | ||
| 993 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", | 696 | dprintk("svc: socket %p TCP data ready (svsk %p)\n", |
| 994 | sk, sk->sk_user_data); | 697 | sk, sk->sk_user_data); |
| 995 | if (svsk) { | 698 | if (svsk) { |
| 996 | set_bit(SK_DATA, &svsk->sk_flags); | 699 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 997 | svc_sock_enqueue(svsk); | 700 | svc_xprt_enqueue(&svsk->sk_xprt); |
| 998 | } | 701 | } |
| 999 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) | 702 | if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) |
| 1000 | wake_up_interruptible(sk->sk_sleep); | 703 | wake_up_interruptible(sk->sk_sleep); |
| 1001 | } | 704 | } |
| 1002 | 705 | ||
| 1003 | static inline int svc_port_is_privileged(struct sockaddr *sin) | ||
| 1004 | { | ||
| 1005 | switch (sin->sa_family) { | ||
| 1006 | case AF_INET: | ||
| 1007 | return ntohs(((struct sockaddr_in *)sin)->sin_port) | ||
| 1008 | < PROT_SOCK; | ||
| 1009 | case AF_INET6: | ||
| 1010 | return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) | ||
| 1011 | < PROT_SOCK; | ||
| 1012 | default: | ||
| 1013 | return 0; | ||
| 1014 | } | ||
| 1015 | } | ||
| 1016 | |||
| 1017 | /* | 706 | /* |
| 1018 | * Accept a TCP connection | 707 | * Accept a TCP connection |
| 1019 | */ | 708 | */ |
| 1020 | static void | 709 | static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) |
| 1021 | svc_tcp_accept(struct svc_sock *svsk) | ||
| 1022 | { | 710 | { |
| 711 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
| 1023 | struct sockaddr_storage addr; | 712 | struct sockaddr_storage addr; |
| 1024 | struct sockaddr *sin = (struct sockaddr *) &addr; | 713 | struct sockaddr *sin = (struct sockaddr *) &addr; |
| 1025 | struct svc_serv *serv = svsk->sk_server; | 714 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; |
| 1026 | struct socket *sock = svsk->sk_sock; | 715 | struct socket *sock = svsk->sk_sock; |
| 1027 | struct socket *newsock; | 716 | struct socket *newsock; |
| 1028 | struct svc_sock *newsvsk; | 717 | struct svc_sock *newsvsk; |
| @@ -1031,9 +720,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
| 1031 | 720 | ||
| 1032 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); | 721 | dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); |
| 1033 | if (!sock) | 722 | if (!sock) |
| 1034 | return; | 723 | return NULL; |
| 1035 | 724 | ||
| 1036 | clear_bit(SK_CONN, &svsk->sk_flags); | 725 | clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
| 1037 | err = kernel_accept(sock, &newsock, O_NONBLOCK); | 726 | err = kernel_accept(sock, &newsock, O_NONBLOCK); |
| 1038 | if (err < 0) { | 727 | if (err < 0) { |
| 1039 | if (err == -ENOMEM) | 728 | if (err == -ENOMEM) |
| @@ -1042,11 +731,9 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
| 1042 | else if (err != -EAGAIN && net_ratelimit()) | 731 | else if (err != -EAGAIN && net_ratelimit()) |
| 1043 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", | 732 | printk(KERN_WARNING "%s: accept failed (err %d)!\n", |
| 1044 | serv->sv_name, -err); | 733 | serv->sv_name, -err); |
| 1045 | return; | 734 | return NULL; |
| 1046 | } | 735 | } |
| 1047 | 736 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); | |
| 1048 | set_bit(SK_CONN, &svsk->sk_flags); | ||
| 1049 | svc_sock_enqueue(svsk); | ||
| 1050 | 737 | ||
| 1051 | err = kernel_getpeername(newsock, sin, &slen); | 738 | err = kernel_getpeername(newsock, sin, &slen); |
| 1052 | if (err < 0) { | 739 | if (err < 0) { |
| @@ -1077,106 +764,42 @@ svc_tcp_accept(struct svc_sock *svsk) | |||
| 1077 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, | 764 | if (!(newsvsk = svc_setup_socket(serv, newsock, &err, |
| 1078 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) | 765 | (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) |
| 1079 | goto failed; | 766 | goto failed; |
| 1080 | memcpy(&newsvsk->sk_remote, sin, slen); | 767 | svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); |
| 1081 | newsvsk->sk_remotelen = slen; | ||
| 1082 | err = kernel_getsockname(newsock, sin, &slen); | 768 | err = kernel_getsockname(newsock, sin, &slen); |
| 1083 | if (unlikely(err < 0)) { | 769 | if (unlikely(err < 0)) { |
| 1084 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); | 770 | dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); |
| 1085 | slen = offsetof(struct sockaddr, sa_data); | 771 | slen = offsetof(struct sockaddr, sa_data); |
| 1086 | } | 772 | } |
| 1087 | memcpy(&newsvsk->sk_local, sin, slen); | 773 | svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); |
| 1088 | |||
| 1089 | svc_sock_received(newsvsk); | ||
| 1090 | |||
| 1091 | /* make sure that we don't have too many active connections. | ||
| 1092 | * If we have, something must be dropped. | ||
| 1093 | * | ||
| 1094 | * There's no point in trying to do random drop here for | ||
| 1095 | * DoS prevention. The NFS clients does 1 reconnect in 15 | ||
| 1096 | * seconds. An attacker can easily beat that. | ||
| 1097 | * | ||
| 1098 | * The only somewhat efficient mechanism would be if drop | ||
| 1099 | * old connections from the same IP first. But right now | ||
| 1100 | * we don't even record the client IP in svc_sock. | ||
| 1101 | */ | ||
| 1102 | if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { | ||
| 1103 | struct svc_sock *svsk = NULL; | ||
| 1104 | spin_lock_bh(&serv->sv_lock); | ||
| 1105 | if (!list_empty(&serv->sv_tempsocks)) { | ||
| 1106 | if (net_ratelimit()) { | ||
| 1107 | /* Try to help the admin */ | ||
| 1108 | printk(KERN_NOTICE "%s: too many open TCP " | ||
| 1109 | "sockets, consider increasing the " | ||
| 1110 | "number of nfsd threads\n", | ||
| 1111 | serv->sv_name); | ||
| 1112 | printk(KERN_NOTICE | ||
| 1113 | "%s: last TCP connect from %s\n", | ||
| 1114 | serv->sv_name, __svc_print_addr(sin, | ||
| 1115 | buf, sizeof(buf))); | ||
| 1116 | } | ||
| 1117 | /* | ||
| 1118 | * Always select the oldest socket. It's not fair, | ||
| 1119 | * but so is life | ||
| 1120 | */ | ||
| 1121 | svsk = list_entry(serv->sv_tempsocks.prev, | ||
| 1122 | struct svc_sock, | ||
| 1123 | sk_list); | ||
| 1124 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
| 1125 | atomic_inc(&svsk->sk_inuse); | ||
| 1126 | } | ||
| 1127 | spin_unlock_bh(&serv->sv_lock); | ||
| 1128 | |||
| 1129 | if (svsk) { | ||
| 1130 | svc_sock_enqueue(svsk); | ||
| 1131 | svc_sock_put(svsk); | ||
| 1132 | } | ||
| 1133 | |||
| 1134 | } | ||
| 1135 | 774 | ||
| 1136 | if (serv->sv_stats) | 775 | if (serv->sv_stats) |
| 1137 | serv->sv_stats->nettcpconn++; | 776 | serv->sv_stats->nettcpconn++; |
| 1138 | 777 | ||
| 1139 | return; | 778 | return &newsvsk->sk_xprt; |
| 1140 | 779 | ||
| 1141 | failed: | 780 | failed: |
| 1142 | sock_release(newsock); | 781 | sock_release(newsock); |
| 1143 | return; | 782 | return NULL; |
| 1144 | } | 783 | } |
| 1145 | 784 | ||
| 1146 | /* | 785 | /* |
| 1147 | * Receive data from a TCP socket. | 786 | * Receive data from a TCP socket. |
| 1148 | */ | 787 | */ |
| 1149 | static int | 788 | static int svc_tcp_recvfrom(struct svc_rqst *rqstp) |
| 1150 | svc_tcp_recvfrom(struct svc_rqst *rqstp) | ||
| 1151 | { | 789 | { |
| 1152 | struct svc_sock *svsk = rqstp->rq_sock; | 790 | struct svc_sock *svsk = |
| 1153 | struct svc_serv *serv = svsk->sk_server; | 791 | container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); |
| 792 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
| 1154 | int len; | 793 | int len; |
| 1155 | struct kvec *vec; | 794 | struct kvec *vec; |
| 1156 | int pnum, vlen; | 795 | int pnum, vlen; |
| 1157 | 796 | ||
| 1158 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", | 797 | dprintk("svc: tcp_recv %p data %d conn %d close %d\n", |
| 1159 | svsk, test_bit(SK_DATA, &svsk->sk_flags), | 798 | svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), |
| 1160 | test_bit(SK_CONN, &svsk->sk_flags), | 799 | test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), |
| 1161 | test_bit(SK_CLOSE, &svsk->sk_flags)); | 800 | test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); |
| 1162 | 801 | ||
| 1163 | if ((rqstp->rq_deferred = svc_deferred_dequeue(svsk))) { | 802 | if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) |
| 1164 | svc_sock_received(svsk); | ||
| 1165 | return svc_deferred_recv(rqstp); | ||
| 1166 | } | ||
| 1167 | |||
| 1168 | if (test_bit(SK_CLOSE, &svsk->sk_flags)) { | ||
| 1169 | svc_delete_socket(svsk); | ||
| 1170 | return 0; | ||
| 1171 | } | ||
| 1172 | |||
| 1173 | if (svsk->sk_sk->sk_state == TCP_LISTEN) { | ||
| 1174 | svc_tcp_accept(svsk); | ||
| 1175 | svc_sock_received(svsk); | ||
| 1176 | return 0; | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) | ||
| 1180 | /* sndbuf needs to have room for one request | 803 | /* sndbuf needs to have room for one request |
| 1181 | * per thread, otherwise we can stall even when the | 804 | * per thread, otherwise we can stall even when the |
| 1182 | * network isn't a bottleneck. | 805 | * network isn't a bottleneck. |
| @@ -1193,7 +816,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
| 1193 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, | 816 | (serv->sv_nrthreads+3) * serv->sv_max_mesg, |
| 1194 | 3 * serv->sv_max_mesg); | 817 | 3 * serv->sv_max_mesg); |
| 1195 | 818 | ||
| 1196 | clear_bit(SK_DATA, &svsk->sk_flags); | 819 | clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 1197 | 820 | ||
| 1198 | /* Receive data. If we haven't got the record length yet, get | 821 | /* Receive data. If we haven't got the record length yet, get |
| 1199 | * the next four bytes. Otherwise try to gobble up as much as | 822 | * the next four bytes. Otherwise try to gobble up as much as |
| @@ -1212,7 +835,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
| 1212 | if (len < want) { | 835 | if (len < want) { |
| 1213 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", | 836 | dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", |
| 1214 | len, want); | 837 | len, want); |
| 1215 | svc_sock_received(svsk); | 838 | svc_xprt_received(&svsk->sk_xprt); |
| 1216 | return -EAGAIN; /* record header not complete */ | 839 | return -EAGAIN; /* record header not complete */ |
| 1217 | } | 840 | } |
| 1218 | 841 | ||
| @@ -1248,11 +871,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
| 1248 | if (len < svsk->sk_reclen) { | 871 | if (len < svsk->sk_reclen) { |
| 1249 | dprintk("svc: incomplete TCP record (%d of %d)\n", | 872 | dprintk("svc: incomplete TCP record (%d of %d)\n", |
| 1250 | len, svsk->sk_reclen); | 873 | len, svsk->sk_reclen); |
| 1251 | svc_sock_received(svsk); | 874 | svc_xprt_received(&svsk->sk_xprt); |
| 1252 | return -EAGAIN; /* record not complete */ | 875 | return -EAGAIN; /* record not complete */ |
| 1253 | } | 876 | } |
| 1254 | len = svsk->sk_reclen; | 877 | len = svsk->sk_reclen; |
| 1255 | set_bit(SK_DATA, &svsk->sk_flags); | 878 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 1256 | 879 | ||
| 1257 | vec = rqstp->rq_vec; | 880 | vec = rqstp->rq_vec; |
| 1258 | vec[0] = rqstp->rq_arg.head[0]; | 881 | vec[0] = rqstp->rq_arg.head[0]; |
| @@ -1281,30 +904,31 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
| 1281 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; | 904 | rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; |
| 1282 | } | 905 | } |
| 1283 | 906 | ||
| 1284 | rqstp->rq_skbuff = NULL; | 907 | rqstp->rq_xprt_ctxt = NULL; |
| 1285 | rqstp->rq_prot = IPPROTO_TCP; | 908 | rqstp->rq_prot = IPPROTO_TCP; |
| 1286 | 909 | ||
| 1287 | /* Reset TCP read info */ | 910 | /* Reset TCP read info */ |
| 1288 | svsk->sk_reclen = 0; | 911 | svsk->sk_reclen = 0; |
| 1289 | svsk->sk_tcplen = 0; | 912 | svsk->sk_tcplen = 0; |
| 1290 | 913 | ||
| 1291 | svc_sock_received(svsk); | 914 | svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); |
| 915 | svc_xprt_received(&svsk->sk_xprt); | ||
| 1292 | if (serv->sv_stats) | 916 | if (serv->sv_stats) |
| 1293 | serv->sv_stats->nettcpcnt++; | 917 | serv->sv_stats->nettcpcnt++; |
| 1294 | 918 | ||
| 1295 | return len; | 919 | return len; |
| 1296 | 920 | ||
| 1297 | err_delete: | 921 | err_delete: |
| 1298 | svc_delete_socket(svsk); | 922 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
| 1299 | return -EAGAIN; | 923 | return -EAGAIN; |
| 1300 | 924 | ||
| 1301 | error: | 925 | error: |
| 1302 | if (len == -EAGAIN) { | 926 | if (len == -EAGAIN) { |
| 1303 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); | 927 | dprintk("RPC: TCP recvfrom got EAGAIN\n"); |
| 1304 | svc_sock_received(svsk); | 928 | svc_xprt_received(&svsk->sk_xprt); |
| 1305 | } else { | 929 | } else { |
| 1306 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", | 930 | printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", |
| 1307 | svsk->sk_server->sv_name, -len); | 931 | svsk->sk_xprt.xpt_server->sv_name, -len); |
| 1308 | goto err_delete; | 932 | goto err_delete; |
| 1309 | } | 933 | } |
| 1310 | 934 | ||
| @@ -1314,8 +938,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) | |||
| 1314 | /* | 938 | /* |
| 1315 | * Send out data on TCP socket. | 939 | * Send out data on TCP socket. |
| 1316 | */ | 940 | */ |
| 1317 | static int | 941 | static int svc_tcp_sendto(struct svc_rqst *rqstp) |
| 1318 | svc_tcp_sendto(struct svc_rqst *rqstp) | ||
| 1319 | { | 942 | { |
| 1320 | struct xdr_buf *xbufp = &rqstp->rq_res; | 943 | struct xdr_buf *xbufp = &rqstp->rq_res; |
| 1321 | int sent; | 944 | int sent; |
| @@ -1328,35 +951,109 @@ svc_tcp_sendto(struct svc_rqst *rqstp) | |||
| 1328 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); | 951 | reclen = htonl(0x80000000|((xbufp->len ) - 4)); |
| 1329 | memcpy(xbufp->head[0].iov_base, &reclen, 4); | 952 | memcpy(xbufp->head[0].iov_base, &reclen, 4); |
| 1330 | 953 | ||
| 1331 | if (test_bit(SK_DEAD, &rqstp->rq_sock->sk_flags)) | 954 | if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) |
| 1332 | return -ENOTCONN; | 955 | return -ENOTCONN; |
| 1333 | 956 | ||
| 1334 | sent = svc_sendto(rqstp, &rqstp->rq_res); | 957 | sent = svc_sendto(rqstp, &rqstp->rq_res); |
| 1335 | if (sent != xbufp->len) { | 958 | if (sent != xbufp->len) { |
| 1336 | printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", | 959 | printk(KERN_NOTICE |
| 1337 | rqstp->rq_sock->sk_server->sv_name, | 960 | "rpc-srv/tcp: %s: %s %d when sending %d bytes " |
| 961 | "- shutting down socket\n", | ||
| 962 | rqstp->rq_xprt->xpt_server->sv_name, | ||
| 1338 | (sent<0)?"got error":"sent only", | 963 | (sent<0)?"got error":"sent only", |
| 1339 | sent, xbufp->len); | 964 | sent, xbufp->len); |
| 1340 | set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); | 965 | set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags); |
| 1341 | svc_sock_enqueue(rqstp->rq_sock); | 966 | svc_xprt_enqueue(rqstp->rq_xprt); |
| 1342 | sent = -EAGAIN; | 967 | sent = -EAGAIN; |
| 1343 | } | 968 | } |
| 1344 | return sent; | 969 | return sent; |
| 1345 | } | 970 | } |
| 1346 | 971 | ||
| 1347 | static void | 972 | /* |
| 1348 | svc_tcp_init(struct svc_sock *svsk) | 973 | * Setup response header. TCP has a 4B record length field. |
| 974 | */ | ||
| 975 | static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) | ||
| 976 | { | ||
| 977 | struct kvec *resv = &rqstp->rq_res.head[0]; | ||
| 978 | |||
| 979 | /* tcp needs a space for the record length... */ | ||
| 980 | svc_putnl(resv, 0); | ||
| 981 | } | ||
| 982 | |||
| 983 | static int svc_tcp_has_wspace(struct svc_xprt *xprt) | ||
| 984 | { | ||
| 985 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); | ||
| 986 | struct svc_serv *serv = svsk->sk_xprt.xpt_server; | ||
| 987 | int required; | ||
| 988 | int wspace; | ||
| 989 | |||
| 990 | /* | ||
| 991 | * Set the SOCK_NOSPACE flag before checking the available | ||
| 992 | * sock space. | ||
| 993 | */ | ||
| 994 | set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
| 995 | required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; | ||
| 996 | wspace = sk_stream_wspace(svsk->sk_sk); | ||
| 997 | |||
| 998 | if (wspace < sk_stream_min_wspace(svsk->sk_sk)) | ||
| 999 | return 0; | ||
| 1000 | if (required * 2 > wspace) | ||
| 1001 | return 0; | ||
| 1002 | |||
| 1003 | clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); | ||
| 1004 | return 1; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, | ||
| 1008 | struct sockaddr *sa, int salen, | ||
| 1009 | int flags) | ||
| 1010 | { | ||
| 1011 | return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | static struct svc_xprt_ops svc_tcp_ops = { | ||
| 1015 | .xpo_create = svc_tcp_create, | ||
| 1016 | .xpo_recvfrom = svc_tcp_recvfrom, | ||
| 1017 | .xpo_sendto = svc_tcp_sendto, | ||
| 1018 | .xpo_release_rqst = svc_release_skb, | ||
| 1019 | .xpo_detach = svc_sock_detach, | ||
| 1020 | .xpo_free = svc_sock_free, | ||
| 1021 | .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, | ||
| 1022 | .xpo_has_wspace = svc_tcp_has_wspace, | ||
| 1023 | .xpo_accept = svc_tcp_accept, | ||
| 1024 | }; | ||
| 1025 | |||
| 1026 | static struct svc_xprt_class svc_tcp_class = { | ||
| 1027 | .xcl_name = "tcp", | ||
| 1028 | .xcl_owner = THIS_MODULE, | ||
| 1029 | .xcl_ops = &svc_tcp_ops, | ||
| 1030 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, | ||
| 1031 | }; | ||
| 1032 | |||
| 1033 | void svc_init_xprt_sock(void) | ||
| 1034 | { | ||
| 1035 | svc_reg_xprt_class(&svc_tcp_class); | ||
| 1036 | svc_reg_xprt_class(&svc_udp_class); | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | void svc_cleanup_xprt_sock(void) | ||
| 1040 | { | ||
| 1041 | svc_unreg_xprt_class(&svc_tcp_class); | ||
| 1042 | svc_unreg_xprt_class(&svc_udp_class); | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) | ||
| 1349 | { | 1046 | { |
| 1350 | struct sock *sk = svsk->sk_sk; | 1047 | struct sock *sk = svsk->sk_sk; |
| 1351 | struct tcp_sock *tp = tcp_sk(sk); | 1048 | struct tcp_sock *tp = tcp_sk(sk); |
| 1352 | 1049 | ||
| 1353 | svsk->sk_recvfrom = svc_tcp_recvfrom; | 1050 | svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); |
| 1354 | svsk->sk_sendto = svc_tcp_sendto; | 1051 | set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); |
| 1355 | |||
| 1356 | if (sk->sk_state == TCP_LISTEN) { | 1052 | if (sk->sk_state == TCP_LISTEN) { |
| 1357 | dprintk("setting up TCP socket for listening\n"); | 1053 | dprintk("setting up TCP socket for listening\n"); |
| 1054 | set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); | ||
| 1358 | sk->sk_data_ready = svc_tcp_listen_data_ready; | 1055 | sk->sk_data_ready = svc_tcp_listen_data_ready; |
| 1359 | set_bit(SK_CONN, &svsk->sk_flags); | 1056 | set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); |
| 1360 | } else { | 1057 | } else { |
| 1361 | dprintk("setting up TCP socket for reading\n"); | 1058 | dprintk("setting up TCP socket for reading\n"); |
| 1362 | sk->sk_state_change = svc_tcp_state_change; | 1059 | sk->sk_state_change = svc_tcp_state_change; |
| @@ -1373,18 +1070,17 @@ svc_tcp_init(struct svc_sock *svsk) | |||
| 1373 | * svc_tcp_recvfrom will re-adjust if necessary | 1070 | * svc_tcp_recvfrom will re-adjust if necessary |
| 1374 | */ | 1071 | */ |
| 1375 | svc_sock_setbufsize(svsk->sk_sock, | 1072 | svc_sock_setbufsize(svsk->sk_sock, |
| 1376 | 3 * svsk->sk_server->sv_max_mesg, | 1073 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, |
| 1377 | 3 * svsk->sk_server->sv_max_mesg); | 1074 | 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); |
| 1378 | 1075 | ||
| 1379 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1076 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
| 1380 | set_bit(SK_DATA, &svsk->sk_flags); | 1077 | set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); |
| 1381 | if (sk->sk_state != TCP_ESTABLISHED) | 1078 | if (sk->sk_state != TCP_ESTABLISHED) |
| 1382 | set_bit(SK_CLOSE, &svsk->sk_flags); | 1079 | set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); |
| 1383 | } | 1080 | } |
| 1384 | } | 1081 | } |
| 1385 | 1082 | ||
| 1386 | void | 1083 | void svc_sock_update_bufs(struct svc_serv *serv) |
| 1387 | svc_sock_update_bufs(struct svc_serv *serv) | ||
| 1388 | { | 1084 | { |
| 1389 | /* | 1085 | /* |
| 1390 | * The number of server threads has changed. Update | 1086 | * The number of server threads has changed. Update |
| @@ -1395,232 +1091,18 @@ svc_sock_update_bufs(struct svc_serv *serv) | |||
| 1395 | spin_lock_bh(&serv->sv_lock); | 1091 | spin_lock_bh(&serv->sv_lock); |
| 1396 | list_for_each(le, &serv->sv_permsocks) { | 1092 | list_for_each(le, &serv->sv_permsocks) { |
| 1397 | struct svc_sock *svsk = | 1093 | struct svc_sock *svsk = |
| 1398 | list_entry(le, struct svc_sock, sk_list); | 1094 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
| 1399 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1095 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
| 1400 | } | 1096 | } |
| 1401 | list_for_each(le, &serv->sv_tempsocks) { | 1097 | list_for_each(le, &serv->sv_tempsocks) { |
| 1402 | struct svc_sock *svsk = | 1098 | struct svc_sock *svsk = |
| 1403 | list_entry(le, struct svc_sock, sk_list); | 1099 | list_entry(le, struct svc_sock, sk_xprt.xpt_list); |
| 1404 | set_bit(SK_CHNGBUF, &svsk->sk_flags); | 1100 | set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); |
| 1405 | } | 1101 | } |
| 1406 | spin_unlock_bh(&serv->sv_lock); | 1102 | spin_unlock_bh(&serv->sv_lock); |
| 1407 | } | 1103 | } |
| 1408 | 1104 | ||
| 1409 | /* | 1105 | /* |
| 1410 | * Receive the next request on any socket. This code is carefully | ||
| 1411 | * organised not to touch any cachelines in the shared svc_serv | ||
| 1412 | * structure, only cachelines in the local svc_pool. | ||
| 1413 | */ | ||
| 1414 | int | ||
| 1415 | svc_recv(struct svc_rqst *rqstp, long timeout) | ||
| 1416 | { | ||
| 1417 | struct svc_sock *svsk = NULL; | ||
| 1418 | struct svc_serv *serv = rqstp->rq_server; | ||
| 1419 | struct svc_pool *pool = rqstp->rq_pool; | ||
| 1420 | int len, i; | ||
| 1421 | int pages; | ||
| 1422 | struct xdr_buf *arg; | ||
| 1423 | DECLARE_WAITQUEUE(wait, current); | ||
| 1424 | |||
| 1425 | dprintk("svc: server %p waiting for data (to = %ld)\n", | ||
| 1426 | rqstp, timeout); | ||
| 1427 | |||
| 1428 | if (rqstp->rq_sock) | ||
| 1429 | printk(KERN_ERR | ||
| 1430 | "svc_recv: service %p, socket not NULL!\n", | ||
| 1431 | rqstp); | ||
| 1432 | if (waitqueue_active(&rqstp->rq_wait)) | ||
| 1433 | printk(KERN_ERR | ||
| 1434 | "svc_recv: service %p, wait queue active!\n", | ||
| 1435 | rqstp); | ||
| 1436 | |||
| 1437 | |||
| 1438 | /* now allocate needed pages. If we get a failure, sleep briefly */ | ||
| 1439 | pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; | ||
| 1440 | for (i=0; i < pages ; i++) | ||
| 1441 | while (rqstp->rq_pages[i] == NULL) { | ||
| 1442 | struct page *p = alloc_page(GFP_KERNEL); | ||
| 1443 | if (!p) | ||
| 1444 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | ||
| 1445 | rqstp->rq_pages[i] = p; | ||
| 1446 | } | ||
| 1447 | rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ | ||
| 1448 | BUG_ON(pages >= RPCSVC_MAXPAGES); | ||
| 1449 | |||
| 1450 | /* Make arg->head point to first page and arg->pages point to rest */ | ||
| 1451 | arg = &rqstp->rq_arg; | ||
| 1452 | arg->head[0].iov_base = page_address(rqstp->rq_pages[0]); | ||
| 1453 | arg->head[0].iov_len = PAGE_SIZE; | ||
| 1454 | arg->pages = rqstp->rq_pages + 1; | ||
| 1455 | arg->page_base = 0; | ||
| 1456 | /* save at least one page for response */ | ||
| 1457 | arg->page_len = (pages-2)*PAGE_SIZE; | ||
| 1458 | arg->len = (pages-1)*PAGE_SIZE; | ||
| 1459 | arg->tail[0].iov_len = 0; | ||
| 1460 | |||
| 1461 | try_to_freeze(); | ||
| 1462 | cond_resched(); | ||
| 1463 | if (signalled()) | ||
| 1464 | return -EINTR; | ||
| 1465 | |||
| 1466 | spin_lock_bh(&pool->sp_lock); | ||
| 1467 | if ((svsk = svc_sock_dequeue(pool)) != NULL) { | ||
| 1468 | rqstp->rq_sock = svsk; | ||
| 1469 | atomic_inc(&svsk->sk_inuse); | ||
| 1470 | rqstp->rq_reserved = serv->sv_max_mesg; | ||
| 1471 | atomic_add(rqstp->rq_reserved, &svsk->sk_reserved); | ||
| 1472 | } else { | ||
| 1473 | /* No data pending. Go to sleep */ | ||
| 1474 | svc_thread_enqueue(pool, rqstp); | ||
| 1475 | |||
| 1476 | /* | ||
| 1477 | * We have to be able to interrupt this wait | ||
| 1478 | * to bring down the daemons ... | ||
| 1479 | */ | ||
| 1480 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 1481 | add_wait_queue(&rqstp->rq_wait, &wait); | ||
| 1482 | spin_unlock_bh(&pool->sp_lock); | ||
| 1483 | |||
| 1484 | schedule_timeout(timeout); | ||
| 1485 | |||
| 1486 | try_to_freeze(); | ||
| 1487 | |||
| 1488 | spin_lock_bh(&pool->sp_lock); | ||
| 1489 | remove_wait_queue(&rqstp->rq_wait, &wait); | ||
| 1490 | |||
| 1491 | if (!(svsk = rqstp->rq_sock)) { | ||
| 1492 | svc_thread_dequeue(pool, rqstp); | ||
| 1493 | spin_unlock_bh(&pool->sp_lock); | ||
| 1494 | dprintk("svc: server %p, no data yet\n", rqstp); | ||
| 1495 | return signalled()? -EINTR : -EAGAIN; | ||
| 1496 | } | ||
| 1497 | } | ||
| 1498 | spin_unlock_bh(&pool->sp_lock); | ||
| 1499 | |||
| 1500 | dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n", | ||
| 1501 | rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse)); | ||
| 1502 | len = svsk->sk_recvfrom(rqstp); | ||
| 1503 | dprintk("svc: got len=%d\n", len); | ||
| 1504 | |||
| 1505 | /* No data, incomplete (TCP) read, or accept() */ | ||
| 1506 | if (len == 0 || len == -EAGAIN) { | ||
| 1507 | rqstp->rq_res.len = 0; | ||
| 1508 | svc_sock_release(rqstp); | ||
| 1509 | return -EAGAIN; | ||
| 1510 | } | ||
| 1511 | svsk->sk_lastrecv = get_seconds(); | ||
| 1512 | clear_bit(SK_OLD, &svsk->sk_flags); | ||
| 1513 | |||
| 1514 | rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); | ||
| 1515 | rqstp->rq_chandle.defer = svc_defer; | ||
| 1516 | |||
| 1517 | if (serv->sv_stats) | ||
| 1518 | serv->sv_stats->netcnt++; | ||
| 1519 | return len; | ||
| 1520 | } | ||
| 1521 | |||
| 1522 | /* | ||
| 1523 | * Drop request | ||
| 1524 | */ | ||
| 1525 | void | ||
| 1526 | svc_drop(struct svc_rqst *rqstp) | ||
| 1527 | { | ||
| 1528 | dprintk("svc: socket %p dropped request\n", rqstp->rq_sock); | ||
| 1529 | svc_sock_release(rqstp); | ||
| 1530 | } | ||
| 1531 | |||
| 1532 | /* | ||
| 1533 | * Return reply to client. | ||
| 1534 | */ | ||
| 1535 | int | ||
| 1536 | svc_send(struct svc_rqst *rqstp) | ||
| 1537 | { | ||
| 1538 | struct svc_sock *svsk; | ||
| 1539 | int len; | ||
| 1540 | struct xdr_buf *xb; | ||
| 1541 | |||
| 1542 | if ((svsk = rqstp->rq_sock) == NULL) { | ||
| 1543 | printk(KERN_WARNING "NULL socket pointer in %s:%d\n", | ||
| 1544 | __FILE__, __LINE__); | ||
| 1545 | return -EFAULT; | ||
| 1546 | } | ||
| 1547 | |||
| 1548 | /* release the receive skb before sending the reply */ | ||
| 1549 | svc_release_skb(rqstp); | ||
| 1550 | |||
| 1551 | /* calculate over-all length */ | ||
| 1552 | xb = & rqstp->rq_res; | ||
| 1553 | xb->len = xb->head[0].iov_len + | ||
| 1554 | xb->page_len + | ||
| 1555 | xb->tail[0].iov_len; | ||
| 1556 | |||
| 1557 | /* Grab svsk->sk_mutex to serialize outgoing data. */ | ||
| 1558 | mutex_lock(&svsk->sk_mutex); | ||
| 1559 | if (test_bit(SK_DEAD, &svsk->sk_flags)) | ||
| 1560 | len = -ENOTCONN; | ||
| 1561 | else | ||
| 1562 | len = svsk->sk_sendto(rqstp); | ||
| 1563 | mutex_unlock(&svsk->sk_mutex); | ||
| 1564 | svc_sock_release(rqstp); | ||
| 1565 | |||
| 1566 | if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) | ||
| 1567 | return 0; | ||
| 1568 | return len; | ||
| 1569 | } | ||
| 1570 | |||
| 1571 | /* | ||
| 1572 | * Timer function to close old temporary sockets, using | ||
| 1573 | * a mark-and-sweep algorithm. | ||
| 1574 | */ | ||
| 1575 | static void | ||
| 1576 | svc_age_temp_sockets(unsigned long closure) | ||
| 1577 | { | ||
| 1578 | struct svc_serv *serv = (struct svc_serv *)closure; | ||
| 1579 | struct svc_sock *svsk; | ||
| 1580 | struct list_head *le, *next; | ||
| 1581 | LIST_HEAD(to_be_aged); | ||
| 1582 | |||
| 1583 | dprintk("svc_age_temp_sockets\n"); | ||
| 1584 | |||
| 1585 | if (!spin_trylock_bh(&serv->sv_lock)) { | ||
| 1586 | /* busy, try again 1 sec later */ | ||
| 1587 | dprintk("svc_age_temp_sockets: busy\n"); | ||
| 1588 | mod_timer(&serv->sv_temptimer, jiffies + HZ); | ||
| 1589 | return; | ||
| 1590 | } | ||
| 1591 | |||
| 1592 | list_for_each_safe(le, next, &serv->sv_tempsocks) { | ||
| 1593 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
| 1594 | |||
| 1595 | if (!test_and_set_bit(SK_OLD, &svsk->sk_flags)) | ||
| 1596 | continue; | ||
| 1597 | if (atomic_read(&svsk->sk_inuse) > 1 || test_bit(SK_BUSY, &svsk->sk_flags)) | ||
| 1598 | continue; | ||
| 1599 | atomic_inc(&svsk->sk_inuse); | ||
| 1600 | list_move(le, &to_be_aged); | ||
| 1601 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
| 1602 | set_bit(SK_DETACHED, &svsk->sk_flags); | ||
| 1603 | } | ||
| 1604 | spin_unlock_bh(&serv->sv_lock); | ||
| 1605 | |||
| 1606 | while (!list_empty(&to_be_aged)) { | ||
| 1607 | le = to_be_aged.next; | ||
| 1608 | /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */ | ||
| 1609 | list_del_init(le); | ||
| 1610 | svsk = list_entry(le, struct svc_sock, sk_list); | ||
| 1611 | |||
| 1612 | dprintk("queuing svsk %p for closing, %lu seconds old\n", | ||
| 1613 | svsk, get_seconds() - svsk->sk_lastrecv); | ||
| 1614 | |||
| 1615 | /* a thread will dequeue and close it soon */ | ||
| 1616 | svc_sock_enqueue(svsk); | ||
| 1617 | svc_sock_put(svsk); | ||
| 1618 | } | ||
| 1619 | |||
| 1620 | mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); | ||
| 1621 | } | ||
| 1622 | |||
| 1623 | /* | ||
| 1624 | * Initialize socket for RPC use and create svc_sock struct | 1106 | * Initialize socket for RPC use and create svc_sock struct |
| 1625 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. | 1107 | * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. |
| 1626 | */ | 1108 | */ |
| @@ -1631,7 +1113,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
| 1631 | struct svc_sock *svsk; | 1113 | struct svc_sock *svsk; |
| 1632 | struct sock *inet; | 1114 | struct sock *inet; |
| 1633 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); | 1115 | int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); |
| 1634 | int is_temporary = flags & SVC_SOCK_TEMPORARY; | ||
| 1635 | 1116 | ||
| 1636 | dprintk("svc: svc_setup_socket %p\n", sock); | 1117 | dprintk("svc: svc_setup_socket %p\n", sock); |
| 1637 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { | 1118 | if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { |
| @@ -1651,44 +1132,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, | |||
| 1651 | return NULL; | 1132 | return NULL; |
| 1652 | } | 1133 | } |
| 1653 | 1134 | ||
| 1654 | set_bit(SK_BUSY, &svsk->sk_flags); | ||
| 1655 | inet->sk_user_data = svsk; | 1135 | inet->sk_user_data = svsk; |
| 1656 | svsk->sk_sock = sock; | 1136 | svsk->sk_sock = sock; |
| 1657 | svsk->sk_sk = inet; | 1137 | svsk->sk_sk = inet; |
| 1658 | svsk->sk_ostate = inet->sk_state_change; | 1138 | svsk->sk_ostate = inet->sk_state_change; |
| 1659 | svsk->sk_odata = inet->sk_data_ready; | 1139 | svsk->sk_odata = inet->sk_data_ready; |
| 1660 | svsk->sk_owspace = inet->sk_write_space; | 1140 | svsk->sk_owspace = inet->sk_write_space; |
| 1661 | svsk->sk_server = serv; | ||
| 1662 | atomic_set(&svsk->sk_inuse, 1); | ||
| 1663 | svsk->sk_lastrecv = get_seconds(); | ||
| 1664 | spin_lock_init(&svsk->sk_lock); | ||
| 1665 | INIT_LIST_HEAD(&svsk->sk_deferred); | ||
| 1666 | INIT_LIST_HEAD(&svsk->sk_ready); | ||
| 1667 | mutex_init(&svsk->sk_mutex); | ||
| 1668 | 1141 | ||
| 1669 | /* Initialize the socket */ | 1142 | /* Initialize the socket */ |
| 1670 | if (sock->type == SOCK_DGRAM) | 1143 | if (sock->type == SOCK_DGRAM) |
| 1671 | svc_udp_init(svsk); | 1144 | svc_udp_init(svsk, serv); |
| 1672 | else | 1145 | else |
| 1673 | svc_tcp_init(svsk); | 1146 | svc_tcp_init(svsk, serv); |
| 1674 | |||
| 1675 | spin_lock_bh(&serv->sv_lock); | ||
| 1676 | if (is_temporary) { | ||
| 1677 | set_bit(SK_TEMP, &svsk->sk_flags); | ||
| 1678 | list_add(&svsk->sk_list, &serv->sv_tempsocks); | ||
| 1679 | serv->sv_tmpcnt++; | ||
| 1680 | if (serv->sv_temptimer.function == NULL) { | ||
| 1681 | /* setup timer to age temp sockets */ | ||
| 1682 | setup_timer(&serv->sv_temptimer, svc_age_temp_sockets, | ||
| 1683 | (unsigned long)serv); | ||
| 1684 | mod_timer(&serv->sv_temptimer, | ||
| 1685 | jiffies + svc_conn_age_period * HZ); | ||
| 1686 | } | ||
| 1687 | } else { | ||
| 1688 | clear_bit(SK_TEMP, &svsk->sk_flags); | ||
| 1689 | list_add(&svsk->sk_list, &serv->sv_permsocks); | ||
| 1690 | } | ||
| 1691 | spin_unlock_bh(&serv->sv_lock); | ||
| 1692 | 1147 | ||
| 1693 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", | 1148 | dprintk("svc: svc_setup_socket created %p (inet %p)\n", |
| 1694 | svsk, svsk->sk_sk); | 1149 | svsk, svsk->sk_sk); |
| @@ -1717,7 +1172,16 @@ int svc_addsock(struct svc_serv *serv, | |||
| 1717 | else { | 1172 | else { |
| 1718 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); | 1173 | svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); |
| 1719 | if (svsk) { | 1174 | if (svsk) { |
| 1720 | svc_sock_received(svsk); | 1175 | struct sockaddr_storage addr; |
| 1176 | struct sockaddr *sin = (struct sockaddr *)&addr; | ||
| 1177 | int salen; | ||
| 1178 | if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) | ||
| 1179 | svc_xprt_set_local(&svsk->sk_xprt, sin, salen); | ||
| 1180 | clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); | ||
| 1181 | spin_lock_bh(&serv->sv_lock); | ||
| 1182 | list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); | ||
| 1183 | spin_unlock_bh(&serv->sv_lock); | ||
| 1184 | svc_xprt_received(&svsk->sk_xprt); | ||
| 1721 | err = 0; | 1185 | err = 0; |
| 1722 | } | 1186 | } |
| 1723 | } | 1187 | } |
| @@ -1733,14 +1197,19 @@ EXPORT_SYMBOL_GPL(svc_addsock); | |||
| 1733 | /* | 1197 | /* |
| 1734 | * Create socket for RPC service. | 1198 | * Create socket for RPC service. |
| 1735 | */ | 1199 | */ |
| 1736 | static int svc_create_socket(struct svc_serv *serv, int protocol, | 1200 | static struct svc_xprt *svc_create_socket(struct svc_serv *serv, |
| 1737 | struct sockaddr *sin, int len, int flags) | 1201 | int protocol, |
| 1202 | struct sockaddr *sin, int len, | ||
| 1203 | int flags) | ||
| 1738 | { | 1204 | { |
| 1739 | struct svc_sock *svsk; | 1205 | struct svc_sock *svsk; |
| 1740 | struct socket *sock; | 1206 | struct socket *sock; |
| 1741 | int error; | 1207 | int error; |
| 1742 | int type; | 1208 | int type; |
| 1743 | char buf[RPC_MAX_ADDRBUFLEN]; | 1209 | char buf[RPC_MAX_ADDRBUFLEN]; |
| 1210 | struct sockaddr_storage addr; | ||
| 1211 | struct sockaddr *newsin = (struct sockaddr *)&addr; | ||
| 1212 | int newlen; | ||
| 1744 | 1213 | ||
| 1745 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", | 1214 | dprintk("svc: svc_create_socket(%s, %d, %s)\n", |
| 1746 | serv->sv_program->pg_name, protocol, | 1215 | serv->sv_program->pg_name, protocol, |
| @@ -1749,13 +1218,13 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
| 1749 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { | 1218 | if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { |
| 1750 | printk(KERN_WARNING "svc: only UDP and TCP " | 1219 | printk(KERN_WARNING "svc: only UDP and TCP " |
| 1751 | "sockets supported\n"); | 1220 | "sockets supported\n"); |
| 1752 | return -EINVAL; | 1221 | return ERR_PTR(-EINVAL); |
| 1753 | } | 1222 | } |
| 1754 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | 1223 | type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; |
| 1755 | 1224 | ||
| 1756 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); | 1225 | error = sock_create_kern(sin->sa_family, type, protocol, &sock); |
| 1757 | if (error < 0) | 1226 | if (error < 0) |
| 1758 | return error; | 1227 | return ERR_PTR(error); |
| 1759 | 1228 | ||
| 1760 | svc_reclassify_socket(sock); | 1229 | svc_reclassify_socket(sock); |
| 1761 | 1230 | ||
| @@ -1765,203 +1234,55 @@ static int svc_create_socket(struct svc_serv *serv, int protocol, | |||
| 1765 | if (error < 0) | 1234 | if (error < 0) |
| 1766 | goto bummer; | 1235 | goto bummer; |
| 1767 | 1236 | ||
| 1237 | newlen = len; | ||
| 1238 | error = kernel_getsockname(sock, newsin, &newlen); | ||
| 1239 | if (error < 0) | ||
| 1240 | goto bummer; | ||
| 1241 | |||
| 1768 | if (protocol == IPPROTO_TCP) { | 1242 | if (protocol == IPPROTO_TCP) { |
| 1769 | if ((error = kernel_listen(sock, 64)) < 0) | 1243 | if ((error = kernel_listen(sock, 64)) < 0) |
| 1770 | goto bummer; | 1244 | goto bummer; |
| 1771 | } | 1245 | } |
| 1772 | 1246 | ||
| 1773 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { | 1247 | if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { |
| 1774 | svc_sock_received(svsk); | 1248 | svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); |
| 1775 | return ntohs(inet_sk(svsk->sk_sk)->sport); | 1249 | return (struct svc_xprt *)svsk; |
| 1776 | } | 1250 | } |
| 1777 | 1251 | ||
| 1778 | bummer: | 1252 | bummer: |
| 1779 | dprintk("svc: svc_create_socket error = %d\n", -error); | 1253 | dprintk("svc: svc_create_socket error = %d\n", -error); |
| 1780 | sock_release(sock); | 1254 | sock_release(sock); |
| 1781 | return error; | 1255 | return ERR_PTR(error); |
| 1782 | } | 1256 | } |
| 1783 | 1257 | ||
| 1784 | /* | 1258 | /* |
| 1785 | * Remove a dead socket | 1259 | * Detach the svc_sock from the socket so that no |
| 1260 | * more callbacks occur. | ||
| 1786 | */ | 1261 | */ |
| 1787 | static void | 1262 | static void svc_sock_detach(struct svc_xprt *xprt) |
| 1788 | svc_delete_socket(struct svc_sock *svsk) | ||
| 1789 | { | 1263 | { |
| 1790 | struct svc_serv *serv; | 1264 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
| 1791 | struct sock *sk; | 1265 | struct sock *sk = svsk->sk_sk; |
| 1792 | |||
| 1793 | dprintk("svc: svc_delete_socket(%p)\n", svsk); | ||
| 1794 | 1266 | ||
| 1795 | serv = svsk->sk_server; | 1267 | dprintk("svc: svc_sock_detach(%p)\n", svsk); |
| 1796 | sk = svsk->sk_sk; | ||
| 1797 | 1268 | ||
| 1269 | /* put back the old socket callbacks */ | ||
| 1798 | sk->sk_state_change = svsk->sk_ostate; | 1270 | sk->sk_state_change = svsk->sk_ostate; |
| 1799 | sk->sk_data_ready = svsk->sk_odata; | 1271 | sk->sk_data_ready = svsk->sk_odata; |
| 1800 | sk->sk_write_space = svsk->sk_owspace; | 1272 | sk->sk_write_space = svsk->sk_owspace; |
| 1801 | |||
| 1802 | spin_lock_bh(&serv->sv_lock); | ||
| 1803 | |||
| 1804 | if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) | ||
| 1805 | list_del_init(&svsk->sk_list); | ||
| 1806 | /* | ||
| 1807 | * We used to delete the svc_sock from whichever list | ||
| 1808 | * it's sk_ready node was on, but we don't actually | ||
| 1809 | * need to. This is because the only time we're called | ||
| 1810 | * while still attached to a queue, the queue itself | ||
| 1811 | * is about to be destroyed (in svc_destroy). | ||
| 1812 | */ | ||
| 1813 | if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { | ||
| 1814 | BUG_ON(atomic_read(&svsk->sk_inuse)<2); | ||
| 1815 | atomic_dec(&svsk->sk_inuse); | ||
| 1816 | if (test_bit(SK_TEMP, &svsk->sk_flags)) | ||
| 1817 | serv->sv_tmpcnt--; | ||
| 1818 | } | ||
| 1819 | |||
| 1820 | spin_unlock_bh(&serv->sv_lock); | ||
| 1821 | } | ||
| 1822 | |||
| 1823 | static void svc_close_socket(struct svc_sock *svsk) | ||
| 1824 | { | ||
| 1825 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
| 1826 | if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) | ||
| 1827 | /* someone else will have to effect the close */ | ||
| 1828 | return; | ||
| 1829 | |||
| 1830 | atomic_inc(&svsk->sk_inuse); | ||
| 1831 | svc_delete_socket(svsk); | ||
| 1832 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
| 1833 | svc_sock_put(svsk); | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | void svc_force_close_socket(struct svc_sock *svsk) | ||
| 1837 | { | ||
| 1838 | set_bit(SK_CLOSE, &svsk->sk_flags); | ||
| 1839 | if (test_bit(SK_BUSY, &svsk->sk_flags)) { | ||
| 1840 | /* Waiting to be processed, but no threads left, | ||
| 1841 | * So just remove it from the waiting list | ||
| 1842 | */ | ||
| 1843 | list_del_init(&svsk->sk_ready); | ||
| 1844 | clear_bit(SK_BUSY, &svsk->sk_flags); | ||
| 1845 | } | ||
| 1846 | svc_close_socket(svsk); | ||
| 1847 | } | ||
| 1848 | |||
| 1849 | /** | ||
| 1850 | * svc_makesock - Make a socket for nfsd and lockd | ||
| 1851 | * @serv: RPC server structure | ||
| 1852 | * @protocol: transport protocol to use | ||
| 1853 | * @port: port to use | ||
| 1854 | * @flags: requested socket characteristics | ||
| 1855 | * | ||
| 1856 | */ | ||
| 1857 | int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port, | ||
| 1858 | int flags) | ||
| 1859 | { | ||
| 1860 | struct sockaddr_in sin = { | ||
| 1861 | .sin_family = AF_INET, | ||
| 1862 | .sin_addr.s_addr = INADDR_ANY, | ||
| 1863 | .sin_port = htons(port), | ||
| 1864 | }; | ||
| 1865 | |||
| 1866 | dprintk("svc: creating socket proto = %d\n", protocol); | ||
| 1867 | return svc_create_socket(serv, protocol, (struct sockaddr *) &sin, | ||
| 1868 | sizeof(sin), flags); | ||
| 1869 | } | 1273 | } |
| 1870 | 1274 | ||
| 1871 | /* | 1275 | /* |
| 1872 | * Handle defer and revisit of requests | 1276 | * Free the svc_sock's socket resources and the svc_sock itself. |
| 1873 | */ | 1277 | */ |
| 1874 | 1278 | static void svc_sock_free(struct svc_xprt *xprt) | |
| 1875 | static void svc_revisit(struct cache_deferred_req *dreq, int too_many) | ||
| 1876 | { | 1279 | { |
| 1877 | struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle); | 1280 | struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); |
| 1878 | struct svc_sock *svsk; | 1281 | dprintk("svc: svc_sock_free(%p)\n", svsk); |
| 1879 | 1282 | ||
| 1880 | if (too_many) { | 1283 | if (svsk->sk_sock->file) |
| 1881 | svc_sock_put(dr->svsk); | 1284 | sockfd_put(svsk->sk_sock); |
| 1882 | kfree(dr); | 1285 | else |
| 1883 | return; | 1286 | sock_release(svsk->sk_sock); |
| 1884 | } | 1287 | kfree(svsk); |
| 1885 | dprintk("revisit queued\n"); | ||
| 1886 | svsk = dr->svsk; | ||
| 1887 | dr->svsk = NULL; | ||
| 1888 | spin_lock(&svsk->sk_lock); | ||
| 1889 | list_add(&dr->handle.recent, &svsk->sk_deferred); | ||
| 1890 | spin_unlock(&svsk->sk_lock); | ||
| 1891 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
| 1892 | svc_sock_enqueue(svsk); | ||
| 1893 | svc_sock_put(svsk); | ||
| 1894 | } | ||
| 1895 | |||
| 1896 | static struct cache_deferred_req * | ||
| 1897 | svc_defer(struct cache_req *req) | ||
| 1898 | { | ||
| 1899 | struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); | ||
| 1900 | int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.len); | ||
| 1901 | struct svc_deferred_req *dr; | ||
| 1902 | |||
| 1903 | if (rqstp->rq_arg.page_len) | ||
| 1904 | return NULL; /* if more than a page, give up FIXME */ | ||
| 1905 | if (rqstp->rq_deferred) { | ||
| 1906 | dr = rqstp->rq_deferred; | ||
| 1907 | rqstp->rq_deferred = NULL; | ||
| 1908 | } else { | ||
| 1909 | int skip = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; | ||
| 1910 | /* FIXME maybe discard if size too large */ | ||
| 1911 | dr = kmalloc(size, GFP_KERNEL); | ||
| 1912 | if (dr == NULL) | ||
| 1913 | return NULL; | ||
| 1914 | |||
| 1915 | dr->handle.owner = rqstp->rq_server; | ||
| 1916 | dr->prot = rqstp->rq_prot; | ||
| 1917 | memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); | ||
| 1918 | dr->addrlen = rqstp->rq_addrlen; | ||
| 1919 | dr->daddr = rqstp->rq_daddr; | ||
| 1920 | dr->argslen = rqstp->rq_arg.len >> 2; | ||
| 1921 | memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); | ||
| 1922 | } | ||
| 1923 | atomic_inc(&rqstp->rq_sock->sk_inuse); | ||
| 1924 | dr->svsk = rqstp->rq_sock; | ||
| 1925 | |||
| 1926 | dr->handle.revisit = svc_revisit; | ||
| 1927 | return &dr->handle; | ||
| 1928 | } | ||
| 1929 | |||
| 1930 | /* | ||
| 1931 | * recv data from a deferred request into an active one | ||
| 1932 | */ | ||
| 1933 | static int svc_deferred_recv(struct svc_rqst *rqstp) | ||
| 1934 | { | ||
| 1935 | struct svc_deferred_req *dr = rqstp->rq_deferred; | ||
| 1936 | |||
| 1937 | rqstp->rq_arg.head[0].iov_base = dr->args; | ||
| 1938 | rqstp->rq_arg.head[0].iov_len = dr->argslen<<2; | ||
| 1939 | rqstp->rq_arg.page_len = 0; | ||
| 1940 | rqstp->rq_arg.len = dr->argslen<<2; | ||
| 1941 | rqstp->rq_prot = dr->prot; | ||
| 1942 | memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); | ||
| 1943 | rqstp->rq_addrlen = dr->addrlen; | ||
| 1944 | rqstp->rq_daddr = dr->daddr; | ||
| 1945 | rqstp->rq_respages = rqstp->rq_pages; | ||
| 1946 | return dr->argslen<<2; | ||
| 1947 | } | ||
| 1948 | |||
| 1949 | |||
| 1950 | static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) | ||
| 1951 | { | ||
| 1952 | struct svc_deferred_req *dr = NULL; | ||
| 1953 | |||
| 1954 | if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) | ||
| 1955 | return NULL; | ||
| 1956 | spin_lock(&svsk->sk_lock); | ||
| 1957 | clear_bit(SK_DEFERRED, &svsk->sk_flags); | ||
| 1958 | if (!list_empty(&svsk->sk_deferred)) { | ||
| 1959 | dr = list_entry(svsk->sk_deferred.next, | ||
| 1960 | struct svc_deferred_req, | ||
| 1961 | handle.recent); | ||
| 1962 | list_del_init(&dr->handle.recent); | ||
| 1963 | set_bit(SK_DEFERRED, &svsk->sk_flags); | ||
| 1964 | } | ||
| 1965 | spin_unlock(&svsk->sk_lock); | ||
| 1966 | return dr; | ||
| 1967 | } | 1288 | } |
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index bada7de0c2fc..0f8c439b848a 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/sunrpc/types.h> | 18 | #include <linux/sunrpc/types.h> |
| 19 | #include <linux/sunrpc/sched.h> | 19 | #include <linux/sunrpc/sched.h> |
| 20 | #include <linux/sunrpc/stats.h> | 20 | #include <linux/sunrpc/stats.h> |
| 21 | #include <linux/sunrpc/svc_xprt.h> | ||
| 21 | 22 | ||
| 22 | /* | 23 | /* |
| 23 | * Declare the debug flags here | 24 | * Declare the debug flags here |
| @@ -55,6 +56,30 @@ rpc_unregister_sysctl(void) | |||
| 55 | } | 56 | } |
| 56 | } | 57 | } |
| 57 | 58 | ||
| 59 | static int proc_do_xprt(ctl_table *table, int write, struct file *file, | ||
| 60 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 61 | { | ||
| 62 | char tmpbuf[256]; | ||
| 63 | int len; | ||
| 64 | if ((*ppos && !write) || !*lenp) { | ||
| 65 | *lenp = 0; | ||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | if (write) | ||
| 69 | return -EINVAL; | ||
| 70 | else { | ||
| 71 | len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); | ||
| 72 | if (!access_ok(VERIFY_WRITE, buffer, len)) | ||
| 73 | return -EFAULT; | ||
| 74 | |||
| 75 | if (__copy_to_user(buffer, tmpbuf, len)) | ||
| 76 | return -EFAULT; | ||
| 77 | } | ||
| 78 | *lenp -= len; | ||
| 79 | *ppos += len; | ||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 58 | static int | 83 | static int |
| 59 | proc_dodebug(ctl_table *table, int write, struct file *file, | 84 | proc_dodebug(ctl_table *table, int write, struct file *file, |
| 60 | void __user *buffer, size_t *lenp, loff_t *ppos) | 85 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| @@ -147,6 +172,12 @@ static ctl_table debug_table[] = { | |||
| 147 | .mode = 0644, | 172 | .mode = 0644, |
| 148 | .proc_handler = &proc_dodebug | 173 | .proc_handler = &proc_dodebug |
| 149 | }, | 174 | }, |
| 175 | { | ||
| 176 | .procname = "transports", | ||
| 177 | .maxlen = 256, | ||
| 178 | .mode = 0444, | ||
| 179 | .proc_handler = &proc_do_xprt, | ||
| 180 | }, | ||
| 150 | { .ctl_name = 0 } | 181 | { .ctl_name = 0 } |
| 151 | }; | 182 | }; |
| 152 | 183 | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 54264062ea69..995c3fdc16c2 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
| @@ -96,11 +96,13 @@ xdr_encode_string(__be32 *p, const char *string) | |||
| 96 | EXPORT_SYMBOL(xdr_encode_string); | 96 | EXPORT_SYMBOL(xdr_encode_string); |
| 97 | 97 | ||
| 98 | __be32 * | 98 | __be32 * |
| 99 | xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen) | 99 | xdr_decode_string_inplace(__be32 *p, char **sp, |
| 100 | unsigned int *lenp, unsigned int maxlen) | ||
| 100 | { | 101 | { |
| 101 | unsigned int len; | 102 | u32 len; |
| 102 | 103 | ||
| 103 | if ((len = ntohl(*p++)) > maxlen) | 104 | len = ntohl(*p++); |
| 105 | if (len > maxlen) | ||
| 104 | return NULL; | 106 | return NULL; |
| 105 | *lenp = len; | 107 | *lenp = len; |
| 106 | *sp = (char *) p; | 108 | *sp = (char *) p; |
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index 264f0feeb513..5a8f268bdd30 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile | |||
| @@ -1,3 +1,8 @@ | |||
| 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o | 1 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o |
| 2 | 2 | ||
| 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o | 3 | xprtrdma-y := transport.o rpc_rdma.o verbs.o |
| 4 | |||
| 5 | obj-$(CONFIG_SUNRPC_XPRT_RDMA) += svcrdma.o | ||
| 6 | |||
| 7 | svcrdma-y := svc_rdma.o svc_rdma_transport.o \ | ||
| 8 | svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c new file mode 100644 index 000000000000..88c0ca20bb1e --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma.c | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is available to you under a choice of one of two | ||
| 5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
| 6 | * General Public License (GPL) Version 2, available from the file | ||
| 7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
| 8 | * license below: | ||
| 9 | * | ||
| 10 | * Redistribution and use in source and binary forms, with or without | ||
| 11 | * modification, are permitted provided that the following conditions | ||
| 12 | * are met: | ||
| 13 | * | ||
| 14 | * Redistributions of source code must retain the above copyright | ||
| 15 | * notice, this list of conditions and the following disclaimer. | ||
| 16 | * | ||
| 17 | * Redistributions in binary form must reproduce the above | ||
| 18 | * copyright notice, this list of conditions and the following | ||
| 19 | * disclaimer in the documentation and/or other materials provided | ||
| 20 | * with the distribution. | ||
| 21 | * | ||
| 22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
| 23 | * its contributors may be used to endorse or promote products | ||
| 24 | * derived from this software without specific prior written | ||
| 25 | * permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | * | ||
| 39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
| 40 | */ | ||
| 41 | #include <linux/module.h> | ||
| 42 | #include <linux/init.h> | ||
| 43 | #include <linux/fs.h> | ||
| 44 | #include <linux/sysctl.h> | ||
| 45 | #include <linux/sunrpc/clnt.h> | ||
| 46 | #include <linux/sunrpc/sched.h> | ||
| 47 | #include <linux/sunrpc/svc_rdma.h> | ||
| 48 | |||
| 49 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
| 50 | |||
| 51 | /* RPC/RDMA parameters */ | ||
| 52 | unsigned int svcrdma_ord = RPCRDMA_ORD; | ||
| 53 | static unsigned int min_ord = 1; | ||
| 54 | static unsigned int max_ord = 4096; | ||
| 55 | unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS; | ||
| 56 | static unsigned int min_max_requests = 4; | ||
| 57 | static unsigned int max_max_requests = 16384; | ||
| 58 | unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE; | ||
| 59 | static unsigned int min_max_inline = 4096; | ||
| 60 | static unsigned int max_max_inline = 65536; | ||
| 61 | |||
| 62 | atomic_t rdma_stat_recv; | ||
| 63 | atomic_t rdma_stat_read; | ||
| 64 | atomic_t rdma_stat_write; | ||
| 65 | atomic_t rdma_stat_sq_starve; | ||
| 66 | atomic_t rdma_stat_rq_starve; | ||
| 67 | atomic_t rdma_stat_rq_poll; | ||
| 68 | atomic_t rdma_stat_rq_prod; | ||
| 69 | atomic_t rdma_stat_sq_poll; | ||
| 70 | atomic_t rdma_stat_sq_prod; | ||
| 71 | |||
| 72 | /* | ||
| 73 | * This function implements reading and resetting an atomic_t stat | ||
| 74 | * variable through read/write to a proc file. Any write to the file | ||
| 75 | * resets the associated statistic to zero. Any read returns it's | ||
| 76 | * current value. | ||
| 77 | */ | ||
| 78 | static int read_reset_stat(ctl_table *table, int write, | ||
| 79 | struct file *filp, void __user *buffer, size_t *lenp, | ||
| 80 | loff_t *ppos) | ||
| 81 | { | ||
| 82 | atomic_t *stat = (atomic_t *)table->data; | ||
| 83 | |||
| 84 | if (!stat) | ||
| 85 | return -EINVAL; | ||
| 86 | |||
| 87 | if (write) | ||
| 88 | atomic_set(stat, 0); | ||
| 89 | else { | ||
| 90 | char str_buf[32]; | ||
| 91 | char *data; | ||
| 92 | int len = snprintf(str_buf, 32, "%d\n", atomic_read(stat)); | ||
| 93 | if (len >= 32) | ||
| 94 | return -EFAULT; | ||
| 95 | len = strlen(str_buf); | ||
| 96 | if (*ppos > len) { | ||
| 97 | *lenp = 0; | ||
| 98 | return 0; | ||
| 99 | } | ||
| 100 | data = &str_buf[*ppos]; | ||
| 101 | len -= *ppos; | ||
| 102 | if (len > *lenp) | ||
| 103 | len = *lenp; | ||
| 104 | if (len && copy_to_user(buffer, str_buf, len)) | ||
| 105 | return -EFAULT; | ||
| 106 | *lenp = len; | ||
| 107 | *ppos += len; | ||
| 108 | } | ||
| 109 | return 0; | ||
| 110 | } | ||
| 111 | |||
| 112 | static struct ctl_table_header *svcrdma_table_header; | ||
| 113 | static ctl_table svcrdma_parm_table[] = { | ||
| 114 | { | ||
| 115 | .procname = "max_requests", | ||
| 116 | .data = &svcrdma_max_requests, | ||
| 117 | .maxlen = sizeof(unsigned int), | ||
| 118 | .mode = 0644, | ||
| 119 | .proc_handler = &proc_dointvec_minmax, | ||
| 120 | .strategy = &sysctl_intvec, | ||
| 121 | .extra1 = &min_max_requests, | ||
| 122 | .extra2 = &max_max_requests | ||
| 123 | }, | ||
| 124 | { | ||
| 125 | .procname = "max_req_size", | ||
| 126 | .data = &svcrdma_max_req_size, | ||
| 127 | .maxlen = sizeof(unsigned int), | ||
| 128 | .mode = 0644, | ||
| 129 | .proc_handler = &proc_dointvec_minmax, | ||
| 130 | .strategy = &sysctl_intvec, | ||
| 131 | .extra1 = &min_max_inline, | ||
| 132 | .extra2 = &max_max_inline | ||
| 133 | }, | ||
| 134 | { | ||
| 135 | .procname = "max_outbound_read_requests", | ||
| 136 | .data = &svcrdma_ord, | ||
| 137 | .maxlen = sizeof(unsigned int), | ||
| 138 | .mode = 0644, | ||
| 139 | .proc_handler = &proc_dointvec_minmax, | ||
| 140 | .strategy = &sysctl_intvec, | ||
| 141 | .extra1 = &min_ord, | ||
| 142 | .extra2 = &max_ord, | ||
| 143 | }, | ||
| 144 | |||
| 145 | { | ||
| 146 | .procname = "rdma_stat_read", | ||
| 147 | .data = &rdma_stat_read, | ||
| 148 | .maxlen = sizeof(atomic_t), | ||
| 149 | .mode = 0644, | ||
| 150 | .proc_handler = &read_reset_stat, | ||
| 151 | }, | ||
| 152 | { | ||
| 153 | .procname = "rdma_stat_recv", | ||
| 154 | .data = &rdma_stat_recv, | ||
| 155 | .maxlen = sizeof(atomic_t), | ||
| 156 | .mode = 0644, | ||
| 157 | .proc_handler = &read_reset_stat, | ||
| 158 | }, | ||
| 159 | { | ||
| 160 | .procname = "rdma_stat_write", | ||
| 161 | .data = &rdma_stat_write, | ||
| 162 | .maxlen = sizeof(atomic_t), | ||
| 163 | .mode = 0644, | ||
| 164 | .proc_handler = &read_reset_stat, | ||
| 165 | }, | ||
| 166 | { | ||
| 167 | .procname = "rdma_stat_sq_starve", | ||
| 168 | .data = &rdma_stat_sq_starve, | ||
| 169 | .maxlen = sizeof(atomic_t), | ||
| 170 | .mode = 0644, | ||
| 171 | .proc_handler = &read_reset_stat, | ||
| 172 | }, | ||
| 173 | { | ||
| 174 | .procname = "rdma_stat_rq_starve", | ||
| 175 | .data = &rdma_stat_rq_starve, | ||
| 176 | .maxlen = sizeof(atomic_t), | ||
| 177 | .mode = 0644, | ||
| 178 | .proc_handler = &read_reset_stat, | ||
| 179 | }, | ||
| 180 | { | ||
| 181 | .procname = "rdma_stat_rq_poll", | ||
| 182 | .data = &rdma_stat_rq_poll, | ||
| 183 | .maxlen = sizeof(atomic_t), | ||
| 184 | .mode = 0644, | ||
| 185 | .proc_handler = &read_reset_stat, | ||
| 186 | }, | ||
| 187 | { | ||
| 188 | .procname = "rdma_stat_rq_prod", | ||
| 189 | .data = &rdma_stat_rq_prod, | ||
| 190 | .maxlen = sizeof(atomic_t), | ||
| 191 | .mode = 0644, | ||
| 192 | .proc_handler = &read_reset_stat, | ||
| 193 | }, | ||
| 194 | { | ||
| 195 | .procname = "rdma_stat_sq_poll", | ||
| 196 | .data = &rdma_stat_sq_poll, | ||
| 197 | .maxlen = sizeof(atomic_t), | ||
| 198 | .mode = 0644, | ||
| 199 | .proc_handler = &read_reset_stat, | ||
| 200 | }, | ||
| 201 | { | ||
| 202 | .procname = "rdma_stat_sq_prod", | ||
| 203 | .data = &rdma_stat_sq_prod, | ||
| 204 | .maxlen = sizeof(atomic_t), | ||
| 205 | .mode = 0644, | ||
| 206 | .proc_handler = &read_reset_stat, | ||
| 207 | }, | ||
| 208 | { | ||
| 209 | .ctl_name = 0, | ||
| 210 | }, | ||
| 211 | }; | ||
| 212 | |||
| 213 | static ctl_table svcrdma_table[] = { | ||
| 214 | { | ||
| 215 | .procname = "svc_rdma", | ||
| 216 | .mode = 0555, | ||
| 217 | .child = svcrdma_parm_table | ||
| 218 | }, | ||
| 219 | { | ||
| 220 | .ctl_name = 0, | ||
| 221 | }, | ||
| 222 | }; | ||
| 223 | |||
| 224 | static ctl_table svcrdma_root_table[] = { | ||
| 225 | { | ||
| 226 | .ctl_name = CTL_SUNRPC, | ||
| 227 | .procname = "sunrpc", | ||
| 228 | .mode = 0555, | ||
| 229 | .child = svcrdma_table | ||
| 230 | }, | ||
| 231 | { | ||
| 232 | .ctl_name = 0, | ||
| 233 | }, | ||
| 234 | }; | ||
| 235 | |||
| 236 | void svc_rdma_cleanup(void) | ||
| 237 | { | ||
| 238 | dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n"); | ||
| 239 | if (svcrdma_table_header) { | ||
| 240 | unregister_sysctl_table(svcrdma_table_header); | ||
| 241 | svcrdma_table_header = NULL; | ||
| 242 | } | ||
| 243 | svc_unreg_xprt_class(&svc_rdma_class); | ||
| 244 | } | ||
| 245 | |||
| 246 | int svc_rdma_init(void) | ||
| 247 | { | ||
| 248 | dprintk("SVCRDMA Module Init, register RPC RDMA transport\n"); | ||
| 249 | dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord); | ||
| 250 | dprintk("\tmax_requests : %d\n", svcrdma_max_requests); | ||
| 251 | dprintk("\tsq_depth : %d\n", | ||
| 252 | svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT); | ||
| 253 | dprintk("\tmax_inline : %d\n", svcrdma_max_req_size); | ||
| 254 | if (!svcrdma_table_header) | ||
| 255 | svcrdma_table_header = | ||
| 256 | register_sysctl_table(svcrdma_root_table); | ||
| 257 | |||
| 258 | /* Register RDMA with the SVC transport switch */ | ||
| 259 | svc_reg_xprt_class(&svc_rdma_class); | ||
| 260 | return 0; | ||
| 261 | } | ||
| 262 | MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); | ||
| 263 | MODULE_DESCRIPTION("SVC RDMA Transport"); | ||
| 264 | MODULE_LICENSE("Dual BSD/GPL"); | ||
| 265 | module_init(svc_rdma_init); | ||
| 266 | module_exit(svc_rdma_cleanup); | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c new file mode 100644 index 000000000000..9530ef2d40dc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c | |||
| @@ -0,0 +1,412 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is available to you under a choice of one of two | ||
| 5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
| 6 | * General Public License (GPL) Version 2, available from the file | ||
| 7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
| 8 | * license below: | ||
| 9 | * | ||
| 10 | * Redistribution and use in source and binary forms, with or without | ||
| 11 | * modification, are permitted provided that the following conditions | ||
| 12 | * are met: | ||
| 13 | * | ||
| 14 | * Redistributions of source code must retain the above copyright | ||
| 15 | * notice, this list of conditions and the following disclaimer. | ||
| 16 | * | ||
| 17 | * Redistributions in binary form must reproduce the above | ||
| 18 | * copyright notice, this list of conditions and the following | ||
| 19 | * disclaimer in the documentation and/or other materials provided | ||
| 20 | * with the distribution. | ||
| 21 | * | ||
| 22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
| 23 | * its contributors may be used to endorse or promote products | ||
| 24 | * derived from this software without specific prior written | ||
| 25 | * permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | * | ||
| 39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
| 40 | */ | ||
| 41 | |||
| 42 | #include <linux/sunrpc/xdr.h> | ||
| 43 | #include <linux/sunrpc/debug.h> | ||
| 44 | #include <asm/unaligned.h> | ||
| 45 | #include <linux/sunrpc/rpc_rdma.h> | ||
| 46 | #include <linux/sunrpc/svc_rdma.h> | ||
| 47 | |||
| 48 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Decodes a read chunk list. The expected format is as follows: | ||
| 52 | * descrim : xdr_one | ||
| 53 | * position : u32 offset into XDR stream | ||
| 54 | * handle : u32 RKEY | ||
| 55 | * . . . | ||
| 56 | * end-of-list: xdr_zero | ||
| 57 | */ | ||
| 58 | static u32 *decode_read_list(u32 *va, u32 *vaend) | ||
| 59 | { | ||
| 60 | struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; | ||
| 61 | |||
| 62 | while (ch->rc_discrim != xdr_zero) { | ||
| 63 | u64 ch_offset; | ||
| 64 | |||
| 65 | if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > | ||
| 66 | (unsigned long)vaend) { | ||
| 67 | dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); | ||
| 68 | return NULL; | ||
| 69 | } | ||
| 70 | |||
| 71 | ch->rc_discrim = ntohl(ch->rc_discrim); | ||
| 72 | ch->rc_position = ntohl(ch->rc_position); | ||
| 73 | ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle); | ||
| 74 | ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length); | ||
| 75 | va = (u32 *)&ch->rc_target.rs_offset; | ||
| 76 | xdr_decode_hyper(va, &ch_offset); | ||
| 77 | put_unaligned(ch_offset, (u64 *)va); | ||
| 78 | ch++; | ||
| 79 | } | ||
| 80 | return (u32 *)&ch->rc_position; | ||
| 81 | } | ||
| 82 | |||
| 83 | /* | ||
| 84 | * Determine number of chunks and total bytes in chunk list. The chunk | ||
| 85 | * list has already been verified to fit within the RPCRDMA header. | ||
| 86 | */ | ||
| 87 | void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, | ||
| 88 | int *ch_count, int *byte_count) | ||
| 89 | { | ||
| 90 | /* compute the number of bytes represented by read chunks */ | ||
| 91 | *byte_count = 0; | ||
| 92 | *ch_count = 0; | ||
| 93 | for (; ch->rc_discrim != 0; ch++) { | ||
| 94 | *byte_count = *byte_count + ch->rc_target.rs_length; | ||
| 95 | *ch_count = *ch_count + 1; | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | /* | ||
| 100 | * Decodes a write chunk list. The expected format is as follows: | ||
| 101 | * descrim : xdr_one | ||
| 102 | * nchunks : <count> | ||
| 103 | * handle : u32 RKEY ---+ | ||
| 104 | * length : u32 <len of segment> | | ||
| 105 | * offset : remove va + <count> | ||
| 106 | * . . . | | ||
| 107 | * ---+ | ||
| 108 | */ | ||
| 109 | static u32 *decode_write_list(u32 *va, u32 *vaend) | ||
| 110 | { | ||
| 111 | int ch_no; | ||
| 112 | struct rpcrdma_write_array *ary = | ||
| 113 | (struct rpcrdma_write_array *)va; | ||
| 114 | |||
| 115 | /* Check for not write-array */ | ||
| 116 | if (ary->wc_discrim == xdr_zero) | ||
| 117 | return (u32 *)&ary->wc_nchunks; | ||
| 118 | |||
| 119 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
| 120 | (unsigned long)vaend) { | ||
| 121 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
| 122 | return NULL; | ||
| 123 | } | ||
| 124 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
| 125 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
| 126 | if (((unsigned long)&ary->wc_array[0] + | ||
| 127 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
| 128 | (unsigned long)vaend) { | ||
| 129 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
| 130 | ary, ary->wc_nchunks, vaend); | ||
| 131 | return NULL; | ||
| 132 | } | ||
| 133 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
| 134 | u64 ch_offset; | ||
| 135 | |||
| 136 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
| 137 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
| 138 | ary->wc_array[ch_no].wc_target.rs_length = | ||
| 139 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
| 140 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
| 141 | xdr_decode_hyper(va, &ch_offset); | ||
| 142 | put_unaligned(ch_offset, (u64 *)va); | ||
| 143 | } | ||
| 144 | |||
| 145 | /* | ||
| 146 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
| 147 | * address skips the list terminator | ||
| 148 | */ | ||
| 149 | return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length; | ||
| 150 | } | ||
| 151 | |||
| 152 | static u32 *decode_reply_array(u32 *va, u32 *vaend) | ||
| 153 | { | ||
| 154 | int ch_no; | ||
| 155 | struct rpcrdma_write_array *ary = | ||
| 156 | (struct rpcrdma_write_array *)va; | ||
| 157 | |||
| 158 | /* Check for no reply-array */ | ||
| 159 | if (ary->wc_discrim == xdr_zero) | ||
| 160 | return (u32 *)&ary->wc_nchunks; | ||
| 161 | |||
| 162 | if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) > | ||
| 163 | (unsigned long)vaend) { | ||
| 164 | dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); | ||
| 165 | return NULL; | ||
| 166 | } | ||
| 167 | ary->wc_discrim = ntohl(ary->wc_discrim); | ||
| 168 | ary->wc_nchunks = ntohl(ary->wc_nchunks); | ||
| 169 | if (((unsigned long)&ary->wc_array[0] + | ||
| 170 | (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > | ||
| 171 | (unsigned long)vaend) { | ||
| 172 | dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", | ||
| 173 | ary, ary->wc_nchunks, vaend); | ||
| 174 | return NULL; | ||
| 175 | } | ||
| 176 | for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { | ||
| 177 | u64 ch_offset; | ||
| 178 | |||
| 179 | ary->wc_array[ch_no].wc_target.rs_handle = | ||
| 180 | ntohl(ary->wc_array[ch_no].wc_target.rs_handle); | ||
| 181 | ary->wc_array[ch_no].wc_target.rs_length = | ||
| 182 | ntohl(ary->wc_array[ch_no].wc_target.rs_length); | ||
| 183 | va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; | ||
| 184 | xdr_decode_hyper(va, &ch_offset); | ||
| 185 | put_unaligned(ch_offset, (u64 *)va); | ||
| 186 | } | ||
| 187 | |||
| 188 | return (u32 *)&ary->wc_array[ch_no]; | ||
| 189 | } | ||
| 190 | |||
| 191 | int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, | ||
| 192 | struct svc_rqst *rqstp) | ||
| 193 | { | ||
| 194 | struct rpcrdma_msg *rmsgp = NULL; | ||
| 195 | u32 *va; | ||
| 196 | u32 *vaend; | ||
| 197 | u32 hdr_len; | ||
| 198 | |||
| 199 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
| 200 | |||
| 201 | /* Verify that there's enough bytes for header + something */ | ||
| 202 | if (rqstp->rq_arg.len <= RPCRDMA_HDRLEN_MIN) { | ||
| 203 | dprintk("svcrdma: header too short = %d\n", | ||
| 204 | rqstp->rq_arg.len); | ||
| 205 | return -EINVAL; | ||
| 206 | } | ||
| 207 | |||
| 208 | /* Decode the header */ | ||
| 209 | rmsgp->rm_xid = ntohl(rmsgp->rm_xid); | ||
| 210 | rmsgp->rm_vers = ntohl(rmsgp->rm_vers); | ||
| 211 | rmsgp->rm_credit = ntohl(rmsgp->rm_credit); | ||
| 212 | rmsgp->rm_type = ntohl(rmsgp->rm_type); | ||
| 213 | |||
| 214 | if (rmsgp->rm_vers != RPCRDMA_VERSION) | ||
| 215 | return -ENOSYS; | ||
| 216 | |||
| 217 | /* Pull in the extra for the padded case and bump our pointer */ | ||
| 218 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
| 219 | int hdrlen; | ||
| 220 | rmsgp->rm_body.rm_padded.rm_align = | ||
| 221 | ntohl(rmsgp->rm_body.rm_padded.rm_align); | ||
| 222 | rmsgp->rm_body.rm_padded.rm_thresh = | ||
| 223 | ntohl(rmsgp->rm_body.rm_padded.rm_thresh); | ||
| 224 | |||
| 225 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
| 226 | rqstp->rq_arg.head[0].iov_base = va; | ||
| 227 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
| 228 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
| 229 | if (hdrlen > rqstp->rq_arg.len) | ||
| 230 | return -EINVAL; | ||
| 231 | return hdrlen; | ||
| 232 | } | ||
| 233 | |||
| 234 | /* The chunk list may contain either a read chunk list or a write | ||
| 235 | * chunk list and a reply chunk list. | ||
| 236 | */ | ||
| 237 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
| 238 | vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len); | ||
| 239 | va = decode_read_list(va, vaend); | ||
| 240 | if (!va) | ||
| 241 | return -EINVAL; | ||
| 242 | va = decode_write_list(va, vaend); | ||
| 243 | if (!va) | ||
| 244 | return -EINVAL; | ||
| 245 | va = decode_reply_array(va, vaend); | ||
| 246 | if (!va) | ||
| 247 | return -EINVAL; | ||
| 248 | |||
| 249 | rqstp->rq_arg.head[0].iov_base = va; | ||
| 250 | hdr_len = (unsigned long)va - (unsigned long)rmsgp; | ||
| 251 | rqstp->rq_arg.head[0].iov_len -= hdr_len; | ||
| 252 | |||
| 253 | *rdma_req = rmsgp; | ||
| 254 | return hdr_len; | ||
| 255 | } | ||
| 256 | |||
| 257 | int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp) | ||
| 258 | { | ||
| 259 | struct rpcrdma_msg *rmsgp = NULL; | ||
| 260 | struct rpcrdma_read_chunk *ch; | ||
| 261 | struct rpcrdma_write_array *ary; | ||
| 262 | u32 *va; | ||
| 263 | u32 hdrlen; | ||
| 264 | |||
| 265 | dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n", | ||
| 266 | rqstp); | ||
| 267 | rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base; | ||
| 268 | |||
| 269 | /* Pull in the extra for the padded case and bump our pointer */ | ||
| 270 | if (rmsgp->rm_type == RDMA_MSGP) { | ||
| 271 | va = &rmsgp->rm_body.rm_padded.rm_pempty[4]; | ||
| 272 | rqstp->rq_arg.head[0].iov_base = va; | ||
| 273 | hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp); | ||
| 274 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
| 275 | return hdrlen; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | ||
| 279 | * Skip all chunks to find RPC msg. These were previously processed | ||
| 280 | */ | ||
| 281 | va = &rmsgp->rm_body.rm_chunks[0]; | ||
| 282 | |||
| 283 | /* Skip read-list */ | ||
| 284 | for (ch = (struct rpcrdma_read_chunk *)va; | ||
| 285 | ch->rc_discrim != xdr_zero; ch++); | ||
| 286 | va = (u32 *)&ch->rc_position; | ||
| 287 | |||
| 288 | /* Skip write-list */ | ||
| 289 | ary = (struct rpcrdma_write_array *)va; | ||
| 290 | if (ary->wc_discrim == xdr_zero) | ||
| 291 | va = (u32 *)&ary->wc_nchunks; | ||
| 292 | else | ||
| 293 | /* | ||
| 294 | * rs_length is the 2nd 4B field in wc_target and taking its | ||
| 295 | * address skips the list terminator | ||
| 296 | */ | ||
| 297 | va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length; | ||
| 298 | |||
| 299 | /* Skip reply-array */ | ||
| 300 | ary = (struct rpcrdma_write_array *)va; | ||
| 301 | if (ary->wc_discrim == xdr_zero) | ||
| 302 | va = (u32 *)&ary->wc_nchunks; | ||
| 303 | else | ||
| 304 | va = (u32 *)&ary->wc_array[ary->wc_nchunks]; | ||
| 305 | |||
| 306 | rqstp->rq_arg.head[0].iov_base = va; | ||
| 307 | hdrlen = (unsigned long)va - (unsigned long)rmsgp; | ||
| 308 | rqstp->rq_arg.head[0].iov_len -= hdrlen; | ||
| 309 | |||
| 310 | return hdrlen; | ||
| 311 | } | ||
| 312 | |||
| 313 | int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt, | ||
| 314 | struct rpcrdma_msg *rmsgp, | ||
| 315 | enum rpcrdma_errcode err, u32 *va) | ||
| 316 | { | ||
| 317 | u32 *startp = va; | ||
| 318 | |||
| 319 | *va++ = htonl(rmsgp->rm_xid); | ||
| 320 | *va++ = htonl(rmsgp->rm_vers); | ||
| 321 | *va++ = htonl(xprt->sc_max_requests); | ||
| 322 | *va++ = htonl(RDMA_ERROR); | ||
| 323 | *va++ = htonl(err); | ||
| 324 | if (err == ERR_VERS) { | ||
| 325 | *va++ = htonl(RPCRDMA_VERSION); | ||
| 326 | *va++ = htonl(RPCRDMA_VERSION); | ||
| 327 | } | ||
| 328 | |||
| 329 | return (int)((unsigned long)va - (unsigned long)startp); | ||
| 330 | } | ||
| 331 | |||
| 332 | int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp) | ||
| 333 | { | ||
| 334 | struct rpcrdma_write_array *wr_ary; | ||
| 335 | |||
| 336 | /* There is no read-list in a reply */ | ||
| 337 | |||
| 338 | /* skip write list */ | ||
| 339 | wr_ary = (struct rpcrdma_write_array *) | ||
| 340 | &rmsgp->rm_body.rm_chunks[1]; | ||
| 341 | if (wr_ary->wc_discrim) | ||
| 342 | wr_ary = (struct rpcrdma_write_array *) | ||
| 343 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]. | ||
| 344 | wc_target.rs_length; | ||
| 345 | else | ||
| 346 | wr_ary = (struct rpcrdma_write_array *) | ||
| 347 | &wr_ary->wc_nchunks; | ||
| 348 | |||
| 349 | /* skip reply array */ | ||
| 350 | if (wr_ary->wc_discrim) | ||
| 351 | wr_ary = (struct rpcrdma_write_array *) | ||
| 352 | &wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)]; | ||
| 353 | else | ||
| 354 | wr_ary = (struct rpcrdma_write_array *) | ||
| 355 | &wr_ary->wc_nchunks; | ||
| 356 | |||
| 357 | return (unsigned long) wr_ary - (unsigned long) rmsgp; | ||
| 358 | } | ||
| 359 | |||
| 360 | void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks) | ||
| 361 | { | ||
| 362 | struct rpcrdma_write_array *ary; | ||
| 363 | |||
| 364 | /* no read-list */ | ||
| 365 | rmsgp->rm_body.rm_chunks[0] = xdr_zero; | ||
| 366 | |||
| 367 | /* write-array discrim */ | ||
| 368 | ary = (struct rpcrdma_write_array *) | ||
| 369 | &rmsgp->rm_body.rm_chunks[1]; | ||
| 370 | ary->wc_discrim = xdr_one; | ||
| 371 | ary->wc_nchunks = htonl(chunks); | ||
| 372 | |||
| 373 | /* write-list terminator */ | ||
| 374 | ary->wc_array[chunks].wc_target.rs_handle = xdr_zero; | ||
| 375 | |||
| 376 | /* reply-array discriminator */ | ||
| 377 | ary->wc_array[chunks].wc_target.rs_length = xdr_zero; | ||
| 378 | } | ||
| 379 | |||
| 380 | void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, | ||
| 381 | int chunks) | ||
| 382 | { | ||
| 383 | ary->wc_discrim = xdr_one; | ||
| 384 | ary->wc_nchunks = htonl(chunks); | ||
| 385 | } | ||
| 386 | |||
| 387 | void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, | ||
| 388 | int chunk_no, | ||
| 389 | u32 rs_handle, u64 rs_offset, | ||
| 390 | u32 write_len) | ||
| 391 | { | ||
| 392 | struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; | ||
| 393 | seg->rs_handle = htonl(rs_handle); | ||
| 394 | seg->rs_length = htonl(write_len); | ||
| 395 | xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset); | ||
| 396 | } | ||
| 397 | |||
| 398 | void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, | ||
| 399 | struct rpcrdma_msg *rdma_argp, | ||
| 400 | struct rpcrdma_msg *rdma_resp, | ||
| 401 | enum rpcrdma_proc rdma_type) | ||
| 402 | { | ||
| 403 | rdma_resp->rm_xid = htonl(rdma_argp->rm_xid); | ||
| 404 | rdma_resp->rm_vers = htonl(rdma_argp->rm_vers); | ||
| 405 | rdma_resp->rm_credit = htonl(xprt->sc_max_requests); | ||
| 406 | rdma_resp->rm_type = htonl(rdma_type); | ||
| 407 | |||
| 408 | /* Encode <nul> chunks lists */ | ||
| 409 | rdma_resp->rm_body.rm_chunks[0] = xdr_zero; | ||
| 410 | rdma_resp->rm_body.rm_chunks[1] = xdr_zero; | ||
| 411 | rdma_resp->rm_body.rm_chunks[2] = xdr_zero; | ||
| 412 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c new file mode 100644 index 000000000000..ab54a736486e --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | |||
| @@ -0,0 +1,586 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is available to you under a choice of one of two | ||
| 5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
| 6 | * General Public License (GPL) Version 2, available from the file | ||
| 7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
| 8 | * license below: | ||
| 9 | * | ||
| 10 | * Redistribution and use in source and binary forms, with or without | ||
| 11 | * modification, are permitted provided that the following conditions | ||
| 12 | * are met: | ||
| 13 | * | ||
| 14 | * Redistributions of source code must retain the above copyright | ||
| 15 | * notice, this list of conditions and the following disclaimer. | ||
| 16 | * | ||
| 17 | * Redistributions in binary form must reproduce the above | ||
| 18 | * copyright notice, this list of conditions and the following | ||
| 19 | * disclaimer in the documentation and/or other materials provided | ||
| 20 | * with the distribution. | ||
| 21 | * | ||
| 22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
| 23 | * its contributors may be used to endorse or promote products | ||
| 24 | * derived from this software without specific prior written | ||
| 25 | * permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | * | ||
| 39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
| 40 | */ | ||
| 41 | |||
| 42 | #include <linux/sunrpc/debug.h> | ||
| 43 | #include <linux/sunrpc/rpc_rdma.h> | ||
| 44 | #include <linux/spinlock.h> | ||
| 45 | #include <asm/unaligned.h> | ||
| 46 | #include <rdma/ib_verbs.h> | ||
| 47 | #include <rdma/rdma_cm.h> | ||
| 48 | #include <linux/sunrpc/svc_rdma.h> | ||
| 49 | |||
| 50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
| 51 | |||
| 52 | /* | ||
| 53 | * Replace the pages in the rq_argpages array with the pages from the SGE in | ||
| 54 | * the RDMA_RECV completion. The SGL should contain full pages up until the | ||
| 55 | * last one. | ||
| 56 | */ | ||
| 57 | static void rdma_build_arg_xdr(struct svc_rqst *rqstp, | ||
| 58 | struct svc_rdma_op_ctxt *ctxt, | ||
| 59 | u32 byte_count) | ||
| 60 | { | ||
| 61 | struct page *page; | ||
| 62 | u32 bc; | ||
| 63 | int sge_no; | ||
| 64 | |||
| 65 | /* Swap the page in the SGE with the page in argpages */ | ||
| 66 | page = ctxt->pages[0]; | ||
| 67 | put_page(rqstp->rq_pages[0]); | ||
| 68 | rqstp->rq_pages[0] = page; | ||
| 69 | |||
| 70 | /* Set up the XDR head */ | ||
| 71 | rqstp->rq_arg.head[0].iov_base = page_address(page); | ||
| 72 | rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length); | ||
| 73 | rqstp->rq_arg.len = byte_count; | ||
| 74 | rqstp->rq_arg.buflen = byte_count; | ||
| 75 | |||
| 76 | /* Compute bytes past head in the SGL */ | ||
| 77 | bc = byte_count - rqstp->rq_arg.head[0].iov_len; | ||
| 78 | |||
| 79 | /* If data remains, store it in the pagelist */ | ||
| 80 | rqstp->rq_arg.page_len = bc; | ||
| 81 | rqstp->rq_arg.page_base = 0; | ||
| 82 | rqstp->rq_arg.pages = &rqstp->rq_pages[1]; | ||
| 83 | sge_no = 1; | ||
| 84 | while (bc && sge_no < ctxt->count) { | ||
| 85 | page = ctxt->pages[sge_no]; | ||
| 86 | put_page(rqstp->rq_pages[sge_no]); | ||
| 87 | rqstp->rq_pages[sge_no] = page; | ||
| 88 | bc -= min(bc, ctxt->sge[sge_no].length); | ||
| 89 | rqstp->rq_arg.buflen += ctxt->sge[sge_no].length; | ||
| 90 | sge_no++; | ||
| 91 | } | ||
| 92 | rqstp->rq_respages = &rqstp->rq_pages[sge_no]; | ||
| 93 | |||
| 94 | /* We should never run out of SGE because the limit is defined to | ||
| 95 | * support the max allowed RPC data length | ||
| 96 | */ | ||
| 97 | BUG_ON(bc && (sge_no == ctxt->count)); | ||
| 98 | BUG_ON((rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len) | ||
| 99 | != byte_count); | ||
| 100 | BUG_ON(rqstp->rq_arg.len != byte_count); | ||
| 101 | |||
| 102 | /* If not all pages were used from the SGL, free the remaining ones */ | ||
| 103 | bc = sge_no; | ||
| 104 | while (sge_no < ctxt->count) { | ||
| 105 | page = ctxt->pages[sge_no++]; | ||
| 106 | put_page(page); | ||
| 107 | } | ||
| 108 | ctxt->count = bc; | ||
| 109 | |||
| 110 | /* Set up tail */ | ||
| 111 | rqstp->rq_arg.tail[0].iov_base = NULL; | ||
| 112 | rqstp->rq_arg.tail[0].iov_len = 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | struct chunk_sge { | ||
| 116 | int start; /* sge no for this chunk */ | ||
| 117 | int count; /* sge count for this chunk */ | ||
| 118 | }; | ||
| 119 | |||
| 120 | /* Encode a read-chunk-list as an array of IB SGE | ||
| 121 | * | ||
| 122 | * Assumptions: | ||
| 123 | * - chunk[0]->position points to pages[0] at an offset of 0 | ||
| 124 | * - pages[] is not physically or virtually contigous and consists of | ||
| 125 | * PAGE_SIZE elements. | ||
| 126 | * | ||
| 127 | * Output: | ||
| 128 | * - sge array pointing into pages[] array. | ||
| 129 | * - chunk_sge array specifying sge index and count for each | ||
| 130 | * chunk in the read list | ||
| 131 | * | ||
| 132 | */ | ||
| 133 | static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, | ||
| 134 | struct svc_rqst *rqstp, | ||
| 135 | struct svc_rdma_op_ctxt *head, | ||
| 136 | struct rpcrdma_msg *rmsgp, | ||
| 137 | struct ib_sge *sge, | ||
| 138 | struct chunk_sge *ch_sge_ary, | ||
| 139 | int ch_count, | ||
| 140 | int byte_count) | ||
| 141 | { | ||
| 142 | int sge_no; | ||
| 143 | int sge_bytes; | ||
| 144 | int page_off; | ||
| 145 | int page_no; | ||
| 146 | int ch_bytes; | ||
| 147 | int ch_no; | ||
| 148 | struct rpcrdma_read_chunk *ch; | ||
| 149 | |||
| 150 | sge_no = 0; | ||
| 151 | page_no = 0; | ||
| 152 | page_off = 0; | ||
| 153 | ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
| 154 | ch_no = 0; | ||
| 155 | ch_bytes = ch->rc_target.rs_length; | ||
| 156 | head->arg.head[0] = rqstp->rq_arg.head[0]; | ||
| 157 | head->arg.tail[0] = rqstp->rq_arg.tail[0]; | ||
| 158 | head->arg.pages = &head->pages[head->count]; | ||
| 159 | head->sge[0].length = head->count; /* save count of hdr pages */ | ||
| 160 | head->arg.page_base = 0; | ||
| 161 | head->arg.page_len = ch_bytes; | ||
| 162 | head->arg.len = rqstp->rq_arg.len + ch_bytes; | ||
| 163 | head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes; | ||
| 164 | head->count++; | ||
| 165 | ch_sge_ary[0].start = 0; | ||
| 166 | while (byte_count) { | ||
| 167 | sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes); | ||
| 168 | sge[sge_no].addr = | ||
| 169 | ib_dma_map_page(xprt->sc_cm_id->device, | ||
| 170 | rqstp->rq_arg.pages[page_no], | ||
| 171 | page_off, sge_bytes, | ||
| 172 | DMA_FROM_DEVICE); | ||
| 173 | sge[sge_no].length = sge_bytes; | ||
| 174 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
| 175 | /* | ||
| 176 | * Don't bump head->count here because the same page | ||
| 177 | * may be used by multiple SGE. | ||
| 178 | */ | ||
| 179 | head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; | ||
| 180 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1]; | ||
| 181 | |||
| 182 | byte_count -= sge_bytes; | ||
| 183 | ch_bytes -= sge_bytes; | ||
| 184 | sge_no++; | ||
| 185 | /* | ||
| 186 | * If all bytes for this chunk have been mapped to an | ||
| 187 | * SGE, move to the next SGE | ||
| 188 | */ | ||
| 189 | if (ch_bytes == 0) { | ||
| 190 | ch_sge_ary[ch_no].count = | ||
| 191 | sge_no - ch_sge_ary[ch_no].start; | ||
| 192 | ch_no++; | ||
| 193 | ch++; | ||
| 194 | ch_sge_ary[ch_no].start = sge_no; | ||
| 195 | ch_bytes = ch->rc_target.rs_length; | ||
| 196 | /* If bytes remaining account for next chunk */ | ||
| 197 | if (byte_count) { | ||
| 198 | head->arg.page_len += ch_bytes; | ||
| 199 | head->arg.len += ch_bytes; | ||
| 200 | head->arg.buflen += ch_bytes; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | /* | ||
| 204 | * If this SGE consumed all of the page, move to the | ||
| 205 | * next page | ||
| 206 | */ | ||
| 207 | if ((sge_bytes + page_off) == PAGE_SIZE) { | ||
| 208 | page_no++; | ||
| 209 | page_off = 0; | ||
| 210 | /* | ||
| 211 | * If there are still bytes left to map, bump | ||
| 212 | * the page count | ||
| 213 | */ | ||
| 214 | if (byte_count) | ||
| 215 | head->count++; | ||
| 216 | } else | ||
| 217 | page_off += sge_bytes; | ||
| 218 | } | ||
| 219 | BUG_ON(byte_count != 0); | ||
| 220 | return sge_no; | ||
| 221 | } | ||
| 222 | |||
| 223 | static void rdma_set_ctxt_sge(struct svc_rdma_op_ctxt *ctxt, | ||
| 224 | struct ib_sge *sge, | ||
| 225 | u64 *sgl_offset, | ||
| 226 | int count) | ||
| 227 | { | ||
| 228 | int i; | ||
| 229 | |||
| 230 | ctxt->count = count; | ||
| 231 | for (i = 0; i < count; i++) { | ||
| 232 | ctxt->sge[i].addr = sge[i].addr; | ||
| 233 | ctxt->sge[i].length = sge[i].length; | ||
| 234 | *sgl_offset = *sgl_offset + sge[i].length; | ||
| 235 | } | ||
| 236 | } | ||
| 237 | |||
| 238 | static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) | ||
| 239 | { | ||
| 240 | #ifdef RDMA_TRANSPORT_IWARP | ||
| 241 | if ((RDMA_TRANSPORT_IWARP == | ||
| 242 | rdma_node_get_transport(xprt->sc_cm_id-> | ||
| 243 | device->node_type)) | ||
| 244 | && sge_count > 1) | ||
| 245 | return 1; | ||
| 246 | else | ||
| 247 | #endif | ||
| 248 | return min_t(int, sge_count, xprt->sc_max_sge); | ||
| 249 | } | ||
| 250 | |||
| 251 | /* | ||
| 252 | * Use RDMA_READ to read data from the advertised client buffer into the | ||
| 253 | * XDR stream starting at rq_arg.head[0].iov_base. | ||
| 254 | * Each chunk in the array | ||
| 255 | * contains the following fields: | ||
| 256 | * discrim - '1', This isn't used for data placement | ||
| 257 | * position - The xdr stream offset (the same for every chunk) | ||
| 258 | * handle - RMR for client memory region | ||
| 259 | * length - data transfer length | ||
| 260 | * offset - 64 bit tagged offset in remote memory region | ||
| 261 | * | ||
| 262 | * On our side, we need to read into a pagelist. The first page immediately | ||
| 263 | * follows the RPC header. | ||
| 264 | * | ||
| 265 | * This function returns 1 to indicate success. The data is not yet in | ||
| 266 | * the pagelist and therefore the RPC request must be deferred. The | ||
| 267 | * I/O completion will enqueue the transport again and | ||
| 268 | * svc_rdma_recvfrom will complete the request. | ||
| 269 | * | ||
| 270 | * NOTE: The ctxt must not be touched after the last WR has been posted | ||
| 271 | * because the I/O completion processing may occur on another | ||
| 272 | * processor and free / modify the context. Ne touche pas! | ||
| 273 | */ | ||
| 274 | static int rdma_read_xdr(struct svcxprt_rdma *xprt, | ||
| 275 | struct rpcrdma_msg *rmsgp, | ||
| 276 | struct svc_rqst *rqstp, | ||
| 277 | struct svc_rdma_op_ctxt *hdr_ctxt) | ||
| 278 | { | ||
| 279 | struct ib_send_wr read_wr; | ||
| 280 | int err = 0; | ||
| 281 | int ch_no; | ||
| 282 | struct ib_sge *sge; | ||
| 283 | int ch_count; | ||
| 284 | int byte_count; | ||
| 285 | int sge_count; | ||
| 286 | u64 sgl_offset; | ||
| 287 | struct rpcrdma_read_chunk *ch; | ||
| 288 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
| 289 | struct svc_rdma_op_ctxt *head; | ||
| 290 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; | ||
| 291 | struct svc_rdma_op_ctxt *tmp_ch_ctxt; | ||
| 292 | struct chunk_sge *ch_sge_ary; | ||
| 293 | |||
| 294 | /* If no read list is present, return 0 */ | ||
| 295 | ch = svc_rdma_get_read_chunk(rmsgp); | ||
| 296 | if (!ch) | ||
| 297 | return 0; | ||
| 298 | |||
| 299 | /* Allocate temporary contexts to keep SGE */ | ||
| 300 | BUG_ON(sizeof(struct ib_sge) < sizeof(struct chunk_sge)); | ||
| 301 | tmp_sge_ctxt = svc_rdma_get_context(xprt); | ||
| 302 | sge = tmp_sge_ctxt->sge; | ||
| 303 | tmp_ch_ctxt = svc_rdma_get_context(xprt); | ||
| 304 | ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; | ||
| 305 | |||
| 306 | svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); | ||
| 307 | sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, | ||
| 308 | sge, ch_sge_ary, | ||
| 309 | ch_count, byte_count); | ||
| 310 | head = svc_rdma_get_context(xprt); | ||
| 311 | sgl_offset = 0; | ||
| 312 | ch_no = 0; | ||
| 313 | |||
| 314 | for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; | ||
| 315 | ch->rc_discrim != 0; ch++, ch_no++) { | ||
| 316 | next_sge: | ||
| 317 | if (!ctxt) | ||
| 318 | ctxt = head; | ||
| 319 | else { | ||
| 320 | ctxt->next = svc_rdma_get_context(xprt); | ||
| 321 | ctxt = ctxt->next; | ||
| 322 | } | ||
| 323 | ctxt->next = NULL; | ||
| 324 | ctxt->direction = DMA_FROM_DEVICE; | ||
| 325 | clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); | ||
| 326 | clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | ||
| 327 | if ((ch+1)->rc_discrim == 0) { | ||
| 328 | /* | ||
| 329 | * Checked in sq_cq_reap to see if we need to | ||
| 330 | * be enqueued | ||
| 331 | */ | ||
| 332 | set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); | ||
| 333 | ctxt->next = hdr_ctxt; | ||
| 334 | hdr_ctxt->next = head; | ||
| 335 | } | ||
| 336 | |||
| 337 | /* Prepare READ WR */ | ||
| 338 | memset(&read_wr, 0, sizeof read_wr); | ||
| 339 | ctxt->wr_op = IB_WR_RDMA_READ; | ||
| 340 | read_wr.wr_id = (unsigned long)ctxt; | ||
| 341 | read_wr.opcode = IB_WR_RDMA_READ; | ||
| 342 | read_wr.send_flags = IB_SEND_SIGNALED; | ||
| 343 | read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; | ||
| 344 | read_wr.wr.rdma.remote_addr = | ||
| 345 | get_unaligned(&(ch->rc_target.rs_offset)) + | ||
| 346 | sgl_offset; | ||
| 347 | read_wr.sg_list = &sge[ch_sge_ary[ch_no].start]; | ||
| 348 | read_wr.num_sge = | ||
| 349 | rdma_read_max_sge(xprt, ch_sge_ary[ch_no].count); | ||
| 350 | rdma_set_ctxt_sge(ctxt, &sge[ch_sge_ary[ch_no].start], | ||
| 351 | &sgl_offset, | ||
| 352 | read_wr.num_sge); | ||
| 353 | |||
| 354 | /* Post the read */ | ||
| 355 | err = svc_rdma_send(xprt, &read_wr); | ||
| 356 | if (err) { | ||
| 357 | printk(KERN_ERR "svcrdma: Error posting send = %d\n", | ||
| 358 | err); | ||
| 359 | /* | ||
| 360 | * Break the circular list so free knows when | ||
| 361 | * to stop if the error happened to occur on | ||
| 362 | * the last read | ||
| 363 | */ | ||
| 364 | ctxt->next = NULL; | ||
| 365 | goto out; | ||
| 366 | } | ||
| 367 | atomic_inc(&rdma_stat_read); | ||
| 368 | |||
| 369 | if (read_wr.num_sge < ch_sge_ary[ch_no].count) { | ||
| 370 | ch_sge_ary[ch_no].count -= read_wr.num_sge; | ||
| 371 | ch_sge_ary[ch_no].start += read_wr.num_sge; | ||
| 372 | goto next_sge; | ||
| 373 | } | ||
| 374 | sgl_offset = 0; | ||
| 375 | err = 0; | ||
| 376 | } | ||
| 377 | |||
| 378 | out: | ||
| 379 | svc_rdma_put_context(tmp_sge_ctxt, 0); | ||
| 380 | svc_rdma_put_context(tmp_ch_ctxt, 0); | ||
| 381 | |||
| 382 | /* Detach arg pages. svc_recv will replenish them */ | ||
| 383 | for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++) | ||
| 384 | rqstp->rq_pages[ch_no] = NULL; | ||
| 385 | |||
| 386 | /* | ||
| 387 | * Detach res pages. svc_release must see a resused count of | ||
| 388 | * zero or it will attempt to put them. | ||
| 389 | */ | ||
| 390 | while (rqstp->rq_resused) | ||
| 391 | rqstp->rq_respages[--rqstp->rq_resused] = NULL; | ||
| 392 | |||
| 393 | if (err) { | ||
| 394 | printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err); | ||
| 395 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
| 396 | /* Free the linked list of read contexts */ | ||
| 397 | while (head != NULL) { | ||
| 398 | ctxt = head->next; | ||
| 399 | svc_rdma_put_context(head, 1); | ||
| 400 | head = ctxt; | ||
| 401 | } | ||
| 402 | return 0; | ||
| 403 | } | ||
| 404 | |||
| 405 | return 1; | ||
| 406 | } | ||
| 407 | |||
| 408 | static int rdma_read_complete(struct svc_rqst *rqstp, | ||
| 409 | struct svc_rdma_op_ctxt *data) | ||
| 410 | { | ||
| 411 | struct svc_rdma_op_ctxt *head = data->next; | ||
| 412 | int page_no; | ||
| 413 | int ret; | ||
| 414 | |||
| 415 | BUG_ON(!head); | ||
| 416 | |||
| 417 | /* Copy RPC pages */ | ||
| 418 | for (page_no = 0; page_no < head->count; page_no++) { | ||
| 419 | put_page(rqstp->rq_pages[page_no]); | ||
| 420 | rqstp->rq_pages[page_no] = head->pages[page_no]; | ||
| 421 | } | ||
| 422 | /* Point rq_arg.pages past header */ | ||
| 423 | rqstp->rq_arg.pages = &rqstp->rq_pages[head->sge[0].length]; | ||
| 424 | rqstp->rq_arg.page_len = head->arg.page_len; | ||
| 425 | rqstp->rq_arg.page_base = head->arg.page_base; | ||
| 426 | |||
| 427 | /* rq_respages starts after the last arg page */ | ||
| 428 | rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; | ||
| 429 | rqstp->rq_resused = 0; | ||
| 430 | |||
| 431 | /* Rebuild rq_arg head and tail. */ | ||
| 432 | rqstp->rq_arg.head[0] = head->arg.head[0]; | ||
| 433 | rqstp->rq_arg.tail[0] = head->arg.tail[0]; | ||
| 434 | rqstp->rq_arg.len = head->arg.len; | ||
| 435 | rqstp->rq_arg.buflen = head->arg.buflen; | ||
| 436 | |||
| 437 | /* XXX: What should this be? */ | ||
| 438 | rqstp->rq_prot = IPPROTO_MAX; | ||
| 439 | |||
| 440 | /* | ||
| 441 | * Free the contexts we used to build the RDMA_READ. We have | ||
| 442 | * to be careful here because the context list uses the same | ||
| 443 | * next pointer used to chain the contexts associated with the | ||
| 444 | * RDMA_READ | ||
| 445 | */ | ||
| 446 | data->next = NULL; /* terminate circular list */ | ||
| 447 | do { | ||
| 448 | data = head->next; | ||
| 449 | svc_rdma_put_context(head, 0); | ||
| 450 | head = data; | ||
| 451 | } while (head != NULL); | ||
| 452 | |||
| 453 | ret = rqstp->rq_arg.head[0].iov_len | ||
| 454 | + rqstp->rq_arg.page_len | ||
| 455 | + rqstp->rq_arg.tail[0].iov_len; | ||
| 456 | dprintk("svcrdma: deferred read ret=%d, rq_arg.len =%d, " | ||
| 457 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | ||
| 458 | ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, | ||
| 459 | rqstp->rq_arg.head[0].iov_len); | ||
| 460 | |||
| 461 | /* Indicate that we've consumed an RQ credit */ | ||
| 462 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
| 463 | svc_xprt_received(rqstp->rq_xprt); | ||
| 464 | return ret; | ||
| 465 | } | ||
| 466 | |||
| 467 | /* | ||
| 468 | * Set up the rqstp thread context to point to the RQ buffer. If | ||
| 469 | * necessary, pull additional data from the client with an RDMA_READ | ||
| 470 | * request. | ||
| 471 | */ | ||
| 472 | int svc_rdma_recvfrom(struct svc_rqst *rqstp) | ||
| 473 | { | ||
| 474 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
| 475 | struct svcxprt_rdma *rdma_xprt = | ||
| 476 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
| 477 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
| 478 | struct rpcrdma_msg *rmsgp; | ||
| 479 | int ret = 0; | ||
| 480 | int len; | ||
| 481 | |||
| 482 | dprintk("svcrdma: rqstp=%p\n", rqstp); | ||
| 483 | |||
| 484 | /* | ||
| 485 | * The rq_xprt_ctxt indicates if we've consumed an RQ credit | ||
| 486 | * or not. It is used in the rdma xpo_release_rqst function to | ||
| 487 | * determine whether or not to return an RQ WQE to the RQ. | ||
| 488 | */ | ||
| 489 | rqstp->rq_xprt_ctxt = NULL; | ||
| 490 | |||
| 491 | spin_lock_bh(&rdma_xprt->sc_read_complete_lock); | ||
| 492 | if (!list_empty(&rdma_xprt->sc_read_complete_q)) { | ||
| 493 | ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, | ||
| 494 | struct svc_rdma_op_ctxt, | ||
| 495 | dto_q); | ||
| 496 | list_del_init(&ctxt->dto_q); | ||
| 497 | } | ||
| 498 | spin_unlock_bh(&rdma_xprt->sc_read_complete_lock); | ||
| 499 | if (ctxt) | ||
| 500 | return rdma_read_complete(rqstp, ctxt); | ||
| 501 | |||
| 502 | spin_lock_bh(&rdma_xprt->sc_rq_dto_lock); | ||
| 503 | if (!list_empty(&rdma_xprt->sc_rq_dto_q)) { | ||
| 504 | ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next, | ||
| 505 | struct svc_rdma_op_ctxt, | ||
| 506 | dto_q); | ||
| 507 | list_del_init(&ctxt->dto_q); | ||
| 508 | } else { | ||
| 509 | atomic_inc(&rdma_stat_rq_starve); | ||
| 510 | clear_bit(XPT_DATA, &xprt->xpt_flags); | ||
| 511 | ctxt = NULL; | ||
| 512 | } | ||
| 513 | spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock); | ||
| 514 | if (!ctxt) { | ||
| 515 | /* This is the EAGAIN path. The svc_recv routine will | ||
| 516 | * return -EAGAIN, the nfsd thread will go to call into | ||
| 517 | * svc_recv again and we shouldn't be on the active | ||
| 518 | * transport list | ||
| 519 | */ | ||
| 520 | if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) | ||
| 521 | goto close_out; | ||
| 522 | |||
| 523 | BUG_ON(ret); | ||
| 524 | goto out; | ||
| 525 | } | ||
| 526 | dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n", | ||
| 527 | ctxt, rdma_xprt, rqstp, ctxt->wc_status); | ||
| 528 | BUG_ON(ctxt->wc_status != IB_WC_SUCCESS); | ||
| 529 | atomic_inc(&rdma_stat_recv); | ||
| 530 | |||
| 531 | /* Build up the XDR from the receive buffers. */ | ||
| 532 | rdma_build_arg_xdr(rqstp, ctxt, ctxt->byte_len); | ||
| 533 | |||
| 534 | /* Decode the RDMA header. */ | ||
| 535 | len = svc_rdma_xdr_decode_req(&rmsgp, rqstp); | ||
| 536 | rqstp->rq_xprt_hlen = len; | ||
| 537 | |||
| 538 | /* If the request is invalid, reply with an error */ | ||
| 539 | if (len < 0) { | ||
| 540 | if (len == -ENOSYS) | ||
| 541 | (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); | ||
| 542 | goto close_out; | ||
| 543 | } | ||
| 544 | |||
| 545 | /* Read read-list data. If we would need to wait, defer | ||
| 546 | * it. Not that in this case, we don't return the RQ credit | ||
| 547 | * until after the read completes. | ||
| 548 | */ | ||
| 549 | if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) { | ||
| 550 | svc_xprt_received(xprt); | ||
| 551 | return 0; | ||
| 552 | } | ||
| 553 | |||
| 554 | /* Indicate we've consumed an RQ credit */ | ||
| 555 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
| 556 | |||
| 557 | ret = rqstp->rq_arg.head[0].iov_len | ||
| 558 | + rqstp->rq_arg.page_len | ||
| 559 | + rqstp->rq_arg.tail[0].iov_len; | ||
| 560 | svc_rdma_put_context(ctxt, 0); | ||
| 561 | out: | ||
| 562 | dprintk("svcrdma: ret = %d, rq_arg.len =%d, " | ||
| 563 | "rq_arg.head[0].iov_base=%p, rq_arg.head[0].iov_len = %zd\n", | ||
| 564 | ret, rqstp->rq_arg.len, | ||
| 565 | rqstp->rq_arg.head[0].iov_base, | ||
| 566 | rqstp->rq_arg.head[0].iov_len); | ||
| 567 | rqstp->rq_prot = IPPROTO_MAX; | ||
| 568 | svc_xprt_copy_addrs(rqstp, xprt); | ||
| 569 | svc_xprt_received(xprt); | ||
| 570 | return ret; | ||
| 571 | |||
| 572 | close_out: | ||
| 573 | if (ctxt) { | ||
| 574 | svc_rdma_put_context(ctxt, 1); | ||
| 575 | /* Indicate we've consumed an RQ credit */ | ||
| 576 | rqstp->rq_xprt_ctxt = rqstp->rq_xprt; | ||
| 577 | } | ||
| 578 | dprintk("svcrdma: transport %p is closing\n", xprt); | ||
| 579 | /* | ||
| 580 | * Set the close bit and enqueue it. svc_recv will see the | ||
| 581 | * close bit and call svc_xprt_delete | ||
| 582 | */ | ||
| 583 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 584 | svc_xprt_received(xprt); | ||
| 585 | return 0; | ||
| 586 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c new file mode 100644 index 000000000000..3e321949e1dc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c | |||
| @@ -0,0 +1,520 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is available to you under a choice of one of two | ||
| 5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
| 6 | * General Public License (GPL) Version 2, available from the file | ||
| 7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
| 8 | * license below: | ||
| 9 | * | ||
| 10 | * Redistribution and use in source and binary forms, with or without | ||
| 11 | * modification, are permitted provided that the following conditions | ||
| 12 | * are met: | ||
| 13 | * | ||
| 14 | * Redistributions of source code must retain the above copyright | ||
| 15 | * notice, this list of conditions and the following disclaimer. | ||
| 16 | * | ||
| 17 | * Redistributions in binary form must reproduce the above | ||
| 18 | * copyright notice, this list of conditions and the following | ||
| 19 | * disclaimer in the documentation and/or other materials provided | ||
| 20 | * with the distribution. | ||
| 21 | * | ||
| 22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
| 23 | * its contributors may be used to endorse or promote products | ||
| 24 | * derived from this software without specific prior written | ||
| 25 | * permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | * | ||
| 39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
| 40 | */ | ||
| 41 | |||
| 42 | #include <linux/sunrpc/debug.h> | ||
| 43 | #include <linux/sunrpc/rpc_rdma.h> | ||
| 44 | #include <linux/spinlock.h> | ||
| 45 | #include <asm/unaligned.h> | ||
| 46 | #include <rdma/ib_verbs.h> | ||
| 47 | #include <rdma/rdma_cm.h> | ||
| 48 | #include <linux/sunrpc/svc_rdma.h> | ||
| 49 | |||
| 50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
| 51 | |||
| 52 | /* Encode an XDR as an array of IB SGE | ||
| 53 | * | ||
| 54 | * Assumptions: | ||
| 55 | * - head[0] is physically contiguous. | ||
| 56 | * - tail[0] is physically contiguous. | ||
| 57 | * - pages[] is not physically or virtually contigous and consists of | ||
| 58 | * PAGE_SIZE elements. | ||
| 59 | * | ||
| 60 | * Output: | ||
| 61 | * SGE[0] reserved for RCPRDMA header | ||
| 62 | * SGE[1] data from xdr->head[] | ||
| 63 | * SGE[2..sge_count-2] data from xdr->pages[] | ||
| 64 | * SGE[sge_count-1] data from xdr->tail. | ||
| 65 | * | ||
| 66 | */ | ||
| 67 | static struct ib_sge *xdr_to_sge(struct svcxprt_rdma *xprt, | ||
| 68 | struct xdr_buf *xdr, | ||
| 69 | struct ib_sge *sge, | ||
| 70 | int *sge_count) | ||
| 71 | { | ||
| 72 | /* Max we need is the length of the XDR / pagesize + one for | ||
| 73 | * head + one for tail + one for RPCRDMA header | ||
| 74 | */ | ||
| 75 | int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; | ||
| 76 | int sge_no; | ||
| 77 | u32 byte_count = xdr->len; | ||
| 78 | u32 sge_bytes; | ||
| 79 | u32 page_bytes; | ||
| 80 | int page_off; | ||
| 81 | int page_no; | ||
| 82 | |||
| 83 | /* Skip the first sge, this is for the RPCRDMA header */ | ||
| 84 | sge_no = 1; | ||
| 85 | |||
| 86 | /* Head SGE */ | ||
| 87 | sge[sge_no].addr = ib_dma_map_single(xprt->sc_cm_id->device, | ||
| 88 | xdr->head[0].iov_base, | ||
| 89 | xdr->head[0].iov_len, | ||
| 90 | DMA_TO_DEVICE); | ||
| 91 | sge_bytes = min_t(u32, byte_count, xdr->head[0].iov_len); | ||
| 92 | byte_count -= sge_bytes; | ||
| 93 | sge[sge_no].length = sge_bytes; | ||
| 94 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
| 95 | sge_no++; | ||
| 96 | |||
| 97 | /* pages SGE */ | ||
| 98 | page_no = 0; | ||
| 99 | page_bytes = xdr->page_len; | ||
| 100 | page_off = xdr->page_base; | ||
| 101 | while (byte_count && page_bytes) { | ||
| 102 | sge_bytes = min_t(u32, byte_count, (PAGE_SIZE-page_off)); | ||
| 103 | sge[sge_no].addr = | ||
| 104 | ib_dma_map_page(xprt->sc_cm_id->device, | ||
| 105 | xdr->pages[page_no], page_off, | ||
| 106 | sge_bytes, DMA_TO_DEVICE); | ||
| 107 | sge_bytes = min(sge_bytes, page_bytes); | ||
| 108 | byte_count -= sge_bytes; | ||
| 109 | page_bytes -= sge_bytes; | ||
| 110 | sge[sge_no].length = sge_bytes; | ||
| 111 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
| 112 | |||
| 113 | sge_no++; | ||
| 114 | page_no++; | ||
| 115 | page_off = 0; /* reset for next time through loop */ | ||
| 116 | } | ||
| 117 | |||
| 118 | /* Tail SGE */ | ||
| 119 | if (byte_count && xdr->tail[0].iov_len) { | ||
| 120 | sge[sge_no].addr = | ||
| 121 | ib_dma_map_single(xprt->sc_cm_id->device, | ||
| 122 | xdr->tail[0].iov_base, | ||
| 123 | xdr->tail[0].iov_len, | ||
| 124 | DMA_TO_DEVICE); | ||
| 125 | sge_bytes = min_t(u32, byte_count, xdr->tail[0].iov_len); | ||
| 126 | byte_count -= sge_bytes; | ||
| 127 | sge[sge_no].length = sge_bytes; | ||
| 128 | sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
| 129 | sge_no++; | ||
| 130 | } | ||
| 131 | |||
| 132 | BUG_ON(sge_no > sge_max); | ||
| 133 | BUG_ON(byte_count != 0); | ||
| 134 | |||
| 135 | *sge_count = sge_no; | ||
| 136 | return sge; | ||
| 137 | } | ||
| 138 | |||
| 139 | |||
| 140 | /* Assumptions: | ||
| 141 | * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE | ||
| 142 | */ | ||
| 143 | static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, | ||
| 144 | u32 rmr, u64 to, | ||
| 145 | u32 xdr_off, int write_len, | ||
| 146 | struct ib_sge *xdr_sge, int sge_count) | ||
| 147 | { | ||
| 148 | struct svc_rdma_op_ctxt *tmp_sge_ctxt; | ||
| 149 | struct ib_send_wr write_wr; | ||
| 150 | struct ib_sge *sge; | ||
| 151 | int xdr_sge_no; | ||
| 152 | int sge_no; | ||
| 153 | int sge_bytes; | ||
| 154 | int sge_off; | ||
| 155 | int bc; | ||
| 156 | struct svc_rdma_op_ctxt *ctxt; | ||
| 157 | int ret = 0; | ||
| 158 | |||
| 159 | BUG_ON(sge_count >= 32); | ||
| 160 | dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " | ||
| 161 | "write_len=%d, xdr_sge=%p, sge_count=%d\n", | ||
| 162 | rmr, to, xdr_off, write_len, xdr_sge, sge_count); | ||
| 163 | |||
| 164 | ctxt = svc_rdma_get_context(xprt); | ||
| 165 | ctxt->count = 0; | ||
| 166 | tmp_sge_ctxt = svc_rdma_get_context(xprt); | ||
| 167 | sge = tmp_sge_ctxt->sge; | ||
| 168 | |||
| 169 | /* Find the SGE associated with xdr_off */ | ||
| 170 | for (bc = xdr_off, xdr_sge_no = 1; bc && xdr_sge_no < sge_count; | ||
| 171 | xdr_sge_no++) { | ||
| 172 | if (xdr_sge[xdr_sge_no].length > bc) | ||
| 173 | break; | ||
| 174 | bc -= xdr_sge[xdr_sge_no].length; | ||
| 175 | } | ||
| 176 | |||
| 177 | sge_off = bc; | ||
| 178 | bc = write_len; | ||
| 179 | sge_no = 0; | ||
| 180 | |||
| 181 | /* Copy the remaining SGE */ | ||
| 182 | while (bc != 0 && xdr_sge_no < sge_count) { | ||
| 183 | sge[sge_no].addr = xdr_sge[xdr_sge_no].addr + sge_off; | ||
| 184 | sge[sge_no].lkey = xdr_sge[xdr_sge_no].lkey; | ||
| 185 | sge_bytes = min((size_t)bc, | ||
| 186 | (size_t)(xdr_sge[xdr_sge_no].length-sge_off)); | ||
| 187 | sge[sge_no].length = sge_bytes; | ||
| 188 | |||
| 189 | sge_off = 0; | ||
| 190 | sge_no++; | ||
| 191 | xdr_sge_no++; | ||
| 192 | bc -= sge_bytes; | ||
| 193 | } | ||
| 194 | |||
| 195 | BUG_ON(bc != 0); | ||
| 196 | BUG_ON(xdr_sge_no > sge_count); | ||
| 197 | |||
| 198 | /* Prepare WRITE WR */ | ||
| 199 | memset(&write_wr, 0, sizeof write_wr); | ||
| 200 | ctxt->wr_op = IB_WR_RDMA_WRITE; | ||
| 201 | write_wr.wr_id = (unsigned long)ctxt; | ||
| 202 | write_wr.sg_list = &sge[0]; | ||
| 203 | write_wr.num_sge = sge_no; | ||
| 204 | write_wr.opcode = IB_WR_RDMA_WRITE; | ||
| 205 | write_wr.send_flags = IB_SEND_SIGNALED; | ||
| 206 | write_wr.wr.rdma.rkey = rmr; | ||
| 207 | write_wr.wr.rdma.remote_addr = to; | ||
| 208 | |||
| 209 | /* Post It */ | ||
| 210 | atomic_inc(&rdma_stat_write); | ||
| 211 | if (svc_rdma_send(xprt, &write_wr)) { | ||
| 212 | svc_rdma_put_context(ctxt, 1); | ||
| 213 | /* Fatal error, close transport */ | ||
| 214 | ret = -EIO; | ||
| 215 | } | ||
| 216 | svc_rdma_put_context(tmp_sge_ctxt, 0); | ||
| 217 | return ret; | ||
| 218 | } | ||
| 219 | |||
| 220 | static int send_write_chunks(struct svcxprt_rdma *xprt, | ||
| 221 | struct rpcrdma_msg *rdma_argp, | ||
| 222 | struct rpcrdma_msg *rdma_resp, | ||
| 223 | struct svc_rqst *rqstp, | ||
| 224 | struct ib_sge *sge, | ||
| 225 | int sge_count) | ||
| 226 | { | ||
| 227 | u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; | ||
| 228 | int write_len; | ||
| 229 | int max_write; | ||
| 230 | u32 xdr_off; | ||
| 231 | int chunk_off; | ||
| 232 | int chunk_no; | ||
| 233 | struct rpcrdma_write_array *arg_ary; | ||
| 234 | struct rpcrdma_write_array *res_ary; | ||
| 235 | int ret; | ||
| 236 | |||
| 237 | arg_ary = svc_rdma_get_write_array(rdma_argp); | ||
| 238 | if (!arg_ary) | ||
| 239 | return 0; | ||
| 240 | res_ary = (struct rpcrdma_write_array *) | ||
| 241 | &rdma_resp->rm_body.rm_chunks[1]; | ||
| 242 | |||
| 243 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
| 244 | |||
| 245 | /* Write chunks start at the pagelist */ | ||
| 246 | for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; | ||
| 247 | xfer_len && chunk_no < arg_ary->wc_nchunks; | ||
| 248 | chunk_no++) { | ||
| 249 | struct rpcrdma_segment *arg_ch; | ||
| 250 | u64 rs_offset; | ||
| 251 | |||
| 252 | arg_ch = &arg_ary->wc_array[chunk_no].wc_target; | ||
| 253 | write_len = min(xfer_len, arg_ch->rs_length); | ||
| 254 | |||
| 255 | /* Prepare the response chunk given the length actually | ||
| 256 | * written */ | ||
| 257 | rs_offset = get_unaligned(&(arg_ch->rs_offset)); | ||
| 258 | svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, | ||
| 259 | arg_ch->rs_handle, | ||
| 260 | rs_offset, | ||
| 261 | write_len); | ||
| 262 | chunk_off = 0; | ||
| 263 | while (write_len) { | ||
| 264 | int this_write; | ||
| 265 | this_write = min(write_len, max_write); | ||
| 266 | ret = send_write(xprt, rqstp, | ||
| 267 | arg_ch->rs_handle, | ||
| 268 | rs_offset + chunk_off, | ||
| 269 | xdr_off, | ||
| 270 | this_write, | ||
| 271 | sge, | ||
| 272 | sge_count); | ||
| 273 | if (ret) { | ||
| 274 | dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", | ||
| 275 | ret); | ||
| 276 | return -EIO; | ||
| 277 | } | ||
| 278 | chunk_off += this_write; | ||
| 279 | xdr_off += this_write; | ||
| 280 | xfer_len -= this_write; | ||
| 281 | write_len -= this_write; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | /* Update the req with the number of chunks actually used */ | ||
| 285 | svc_rdma_xdr_encode_write_list(rdma_resp, chunk_no); | ||
| 286 | |||
| 287 | return rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; | ||
| 288 | } | ||
| 289 | |||
| 290 | static int send_reply_chunks(struct svcxprt_rdma *xprt, | ||
| 291 | struct rpcrdma_msg *rdma_argp, | ||
| 292 | struct rpcrdma_msg *rdma_resp, | ||
| 293 | struct svc_rqst *rqstp, | ||
| 294 | struct ib_sge *sge, | ||
| 295 | int sge_count) | ||
| 296 | { | ||
| 297 | u32 xfer_len = rqstp->rq_res.len; | ||
| 298 | int write_len; | ||
| 299 | int max_write; | ||
| 300 | u32 xdr_off; | ||
| 301 | int chunk_no; | ||
| 302 | int chunk_off; | ||
| 303 | struct rpcrdma_segment *ch; | ||
| 304 | struct rpcrdma_write_array *arg_ary; | ||
| 305 | struct rpcrdma_write_array *res_ary; | ||
| 306 | int ret; | ||
| 307 | |||
| 308 | arg_ary = svc_rdma_get_reply_array(rdma_argp); | ||
| 309 | if (!arg_ary) | ||
| 310 | return 0; | ||
| 311 | /* XXX: need to fix when reply lists occur with read-list and or | ||
| 312 | * write-list */ | ||
| 313 | res_ary = (struct rpcrdma_write_array *) | ||
| 314 | &rdma_resp->rm_body.rm_chunks[2]; | ||
| 315 | |||
| 316 | max_write = xprt->sc_max_sge * PAGE_SIZE; | ||
| 317 | |||
| 318 | /* xdr offset starts at RPC message */ | ||
| 319 | for (xdr_off = 0, chunk_no = 0; | ||
| 320 | xfer_len && chunk_no < arg_ary->wc_nchunks; | ||
| 321 | chunk_no++) { | ||
| 322 | u64 rs_offset; | ||
| 323 | ch = &arg_ary->wc_array[chunk_no].wc_target; | ||
| 324 | write_len = min(xfer_len, ch->rs_length); | ||
| 325 | |||
| 326 | |||
| 327 | /* Prepare the reply chunk given the length actually | ||
| 328 | * written */ | ||
| 329 | rs_offset = get_unaligned(&(ch->rs_offset)); | ||
| 330 | svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, | ||
| 331 | ch->rs_handle, rs_offset, | ||
| 332 | write_len); | ||
| 333 | chunk_off = 0; | ||
| 334 | while (write_len) { | ||
| 335 | int this_write; | ||
| 336 | |||
| 337 | this_write = min(write_len, max_write); | ||
| 338 | ret = send_write(xprt, rqstp, | ||
| 339 | ch->rs_handle, | ||
| 340 | rs_offset + chunk_off, | ||
| 341 | xdr_off, | ||
| 342 | this_write, | ||
| 343 | sge, | ||
| 344 | sge_count); | ||
| 345 | if (ret) { | ||
| 346 | dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", | ||
| 347 | ret); | ||
| 348 | return -EIO; | ||
| 349 | } | ||
| 350 | chunk_off += this_write; | ||
| 351 | xdr_off += this_write; | ||
| 352 | xfer_len -= this_write; | ||
| 353 | write_len -= this_write; | ||
| 354 | } | ||
| 355 | } | ||
| 356 | /* Update the req with the number of chunks actually used */ | ||
| 357 | svc_rdma_xdr_encode_reply_array(res_ary, chunk_no); | ||
| 358 | |||
| 359 | return rqstp->rq_res.len; | ||
| 360 | } | ||
| 361 | |||
| 362 | /* This function prepares the portion of the RPCRDMA message to be | ||
| 363 | * sent in the RDMA_SEND. This function is called after data sent via | ||
| 364 | * RDMA has already been transmitted. There are three cases: | ||
| 365 | * - The RPCRDMA header, RPC header, and payload are all sent in a | ||
| 366 | * single RDMA_SEND. This is the "inline" case. | ||
| 367 | * - The RPCRDMA header and some portion of the RPC header and data | ||
| 368 | * are sent via this RDMA_SEND and another portion of the data is | ||
| 369 | * sent via RDMA. | ||
| 370 | * - The RPCRDMA header [NOMSG] is sent in this RDMA_SEND and the RPC | ||
| 371 | * header and data are all transmitted via RDMA. | ||
| 372 | * In all three cases, this function prepares the RPCRDMA header in | ||
| 373 | * sge[0], the 'type' parameter indicates the type to place in the | ||
| 374 | * RPCRDMA header, and the 'byte_count' field indicates how much of | ||
| 375 | * the XDR to include in this RDMA_SEND. | ||
| 376 | */ | ||
| 377 | static int send_reply(struct svcxprt_rdma *rdma, | ||
| 378 | struct svc_rqst *rqstp, | ||
| 379 | struct page *page, | ||
| 380 | struct rpcrdma_msg *rdma_resp, | ||
| 381 | struct svc_rdma_op_ctxt *ctxt, | ||
| 382 | int sge_count, | ||
| 383 | int byte_count) | ||
| 384 | { | ||
| 385 | struct ib_send_wr send_wr; | ||
| 386 | int sge_no; | ||
| 387 | int sge_bytes; | ||
| 388 | int page_no; | ||
| 389 | int ret; | ||
| 390 | |||
| 391 | /* Prepare the context */ | ||
| 392 | ctxt->pages[0] = page; | ||
| 393 | ctxt->count = 1; | ||
| 394 | |||
| 395 | /* Prepare the SGE for the RPCRDMA Header */ | ||
| 396 | ctxt->sge[0].addr = | ||
| 397 | ib_dma_map_page(rdma->sc_cm_id->device, | ||
| 398 | page, 0, PAGE_SIZE, DMA_TO_DEVICE); | ||
| 399 | ctxt->direction = DMA_TO_DEVICE; | ||
| 400 | ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); | ||
| 401 | ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; | ||
| 402 | |||
| 403 | /* Determine how many of our SGE are to be transmitted */ | ||
| 404 | for (sge_no = 1; byte_count && sge_no < sge_count; sge_no++) { | ||
| 405 | sge_bytes = min((size_t)ctxt->sge[sge_no].length, | ||
| 406 | (size_t)byte_count); | ||
| 407 | byte_count -= sge_bytes; | ||
| 408 | } | ||
| 409 | BUG_ON(byte_count != 0); | ||
| 410 | |||
| 411 | /* Save all respages in the ctxt and remove them from the | ||
| 412 | * respages array. They are our pages until the I/O | ||
| 413 | * completes. | ||
| 414 | */ | ||
| 415 | for (page_no = 0; page_no < rqstp->rq_resused; page_no++) { | ||
| 416 | ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; | ||
| 417 | ctxt->count++; | ||
| 418 | rqstp->rq_respages[page_no] = NULL; | ||
| 419 | } | ||
| 420 | |||
| 421 | BUG_ON(sge_no > rdma->sc_max_sge); | ||
| 422 | memset(&send_wr, 0, sizeof send_wr); | ||
| 423 | ctxt->wr_op = IB_WR_SEND; | ||
| 424 | send_wr.wr_id = (unsigned long)ctxt; | ||
| 425 | send_wr.sg_list = ctxt->sge; | ||
| 426 | send_wr.num_sge = sge_no; | ||
| 427 | send_wr.opcode = IB_WR_SEND; | ||
| 428 | send_wr.send_flags = IB_SEND_SIGNALED; | ||
| 429 | |||
| 430 | ret = svc_rdma_send(rdma, &send_wr); | ||
| 431 | if (ret) | ||
| 432 | svc_rdma_put_context(ctxt, 1); | ||
| 433 | |||
| 434 | return ret; | ||
| 435 | } | ||
| 436 | |||
| 437 | void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) | ||
| 438 | { | ||
| 439 | } | ||
| 440 | |||
| 441 | /* | ||
| 442 | * Return the start of an xdr buffer. | ||
| 443 | */ | ||
| 444 | static void *xdr_start(struct xdr_buf *xdr) | ||
| 445 | { | ||
| 446 | return xdr->head[0].iov_base - | ||
| 447 | (xdr->len - | ||
| 448 | xdr->page_len - | ||
| 449 | xdr->tail[0].iov_len - | ||
| 450 | xdr->head[0].iov_len); | ||
| 451 | } | ||
| 452 | |||
| 453 | int svc_rdma_sendto(struct svc_rqst *rqstp) | ||
| 454 | { | ||
| 455 | struct svc_xprt *xprt = rqstp->rq_xprt; | ||
| 456 | struct svcxprt_rdma *rdma = | ||
| 457 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
| 458 | struct rpcrdma_msg *rdma_argp; | ||
| 459 | struct rpcrdma_msg *rdma_resp; | ||
| 460 | struct rpcrdma_write_array *reply_ary; | ||
| 461 | enum rpcrdma_proc reply_type; | ||
| 462 | int ret; | ||
| 463 | int inline_bytes; | ||
| 464 | struct ib_sge *sge; | ||
| 465 | int sge_count = 0; | ||
| 466 | struct page *res_page; | ||
| 467 | struct svc_rdma_op_ctxt *ctxt; | ||
| 468 | |||
| 469 | dprintk("svcrdma: sending response for rqstp=%p\n", rqstp); | ||
| 470 | |||
| 471 | /* Get the RDMA request header. */ | ||
| 472 | rdma_argp = xdr_start(&rqstp->rq_arg); | ||
| 473 | |||
| 474 | /* Build an SGE for the XDR */ | ||
| 475 | ctxt = svc_rdma_get_context(rdma); | ||
| 476 | ctxt->direction = DMA_TO_DEVICE; | ||
| 477 | sge = xdr_to_sge(rdma, &rqstp->rq_res, ctxt->sge, &sge_count); | ||
| 478 | |||
| 479 | inline_bytes = rqstp->rq_res.len; | ||
| 480 | |||
| 481 | /* Create the RDMA response header */ | ||
| 482 | res_page = svc_rdma_get_page(); | ||
| 483 | rdma_resp = page_address(res_page); | ||
| 484 | reply_ary = svc_rdma_get_reply_array(rdma_argp); | ||
| 485 | if (reply_ary) | ||
| 486 | reply_type = RDMA_NOMSG; | ||
| 487 | else | ||
| 488 | reply_type = RDMA_MSG; | ||
| 489 | svc_rdma_xdr_encode_reply_header(rdma, rdma_argp, | ||
| 490 | rdma_resp, reply_type); | ||
| 491 | |||
| 492 | /* Send any write-chunk data and build resp write-list */ | ||
| 493 | ret = send_write_chunks(rdma, rdma_argp, rdma_resp, | ||
| 494 | rqstp, sge, sge_count); | ||
| 495 | if (ret < 0) { | ||
| 496 | printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", | ||
| 497 | ret); | ||
| 498 | goto error; | ||
| 499 | } | ||
| 500 | inline_bytes -= ret; | ||
| 501 | |||
| 502 | /* Send any reply-list data and update resp reply-list */ | ||
| 503 | ret = send_reply_chunks(rdma, rdma_argp, rdma_resp, | ||
| 504 | rqstp, sge, sge_count); | ||
| 505 | if (ret < 0) { | ||
| 506 | printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", | ||
| 507 | ret); | ||
| 508 | goto error; | ||
| 509 | } | ||
| 510 | inline_bytes -= ret; | ||
| 511 | |||
| 512 | ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, sge_count, | ||
| 513 | inline_bytes); | ||
| 514 | dprintk("svcrdma: send_reply returns %d\n", ret); | ||
| 515 | return ret; | ||
| 516 | error: | ||
| 517 | svc_rdma_put_context(ctxt, 0); | ||
| 518 | put_page(res_page); | ||
| 519 | return ret; | ||
| 520 | } | ||
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c new file mode 100644 index 000000000000..f09444c451bc --- /dev/null +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c | |||
| @@ -0,0 +1,1080 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved. | ||
| 3 | * | ||
| 4 | * This software is available to you under a choice of one of two | ||
| 5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
| 6 | * General Public License (GPL) Version 2, available from the file | ||
| 7 | * COPYING in the main directory of this source tree, or the BSD-type | ||
| 8 | * license below: | ||
| 9 | * | ||
| 10 | * Redistribution and use in source and binary forms, with or without | ||
| 11 | * modification, are permitted provided that the following conditions | ||
| 12 | * are met: | ||
| 13 | * | ||
| 14 | * Redistributions of source code must retain the above copyright | ||
| 15 | * notice, this list of conditions and the following disclaimer. | ||
| 16 | * | ||
| 17 | * Redistributions in binary form must reproduce the above | ||
| 18 | * copyright notice, this list of conditions and the following | ||
| 19 | * disclaimer in the documentation and/or other materials provided | ||
| 20 | * with the distribution. | ||
| 21 | * | ||
| 22 | * Neither the name of the Network Appliance, Inc. nor the names of | ||
| 23 | * its contributors may be used to endorse or promote products | ||
| 24 | * derived from this software without specific prior written | ||
| 25 | * permission. | ||
| 26 | * | ||
| 27 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
| 28 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
| 29 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
| 30 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
| 31 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
| 32 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
| 33 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
| 34 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
| 35 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| 36 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
| 37 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| 38 | * | ||
| 39 | * Author: Tom Tucker <tom@opengridcomputing.com> | ||
| 40 | */ | ||
| 41 | |||
| 42 | #include <linux/sunrpc/svc_xprt.h> | ||
| 43 | #include <linux/sunrpc/debug.h> | ||
| 44 | #include <linux/sunrpc/rpc_rdma.h> | ||
| 45 | #include <linux/spinlock.h> | ||
| 46 | #include <rdma/ib_verbs.h> | ||
| 47 | #include <rdma/rdma_cm.h> | ||
| 48 | #include <linux/sunrpc/svc_rdma.h> | ||
| 49 | |||
| 50 | #define RPCDBG_FACILITY RPCDBG_SVCXPRT | ||
| 51 | |||
| 52 | static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | ||
| 53 | struct sockaddr *sa, int salen, | ||
| 54 | int flags); | ||
| 55 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt); | ||
| 56 | static void svc_rdma_release_rqst(struct svc_rqst *); | ||
| 57 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt); | ||
| 58 | static void dto_tasklet_func(unsigned long data); | ||
| 59 | static void svc_rdma_detach(struct svc_xprt *xprt); | ||
| 60 | static void svc_rdma_free(struct svc_xprt *xprt); | ||
| 61 | static int svc_rdma_has_wspace(struct svc_xprt *xprt); | ||
| 62 | static void rq_cq_reap(struct svcxprt_rdma *xprt); | ||
| 63 | static void sq_cq_reap(struct svcxprt_rdma *xprt); | ||
| 64 | |||
| 65 | DECLARE_TASKLET(dto_tasklet, dto_tasklet_func, 0UL); | ||
| 66 | static DEFINE_SPINLOCK(dto_lock); | ||
| 67 | static LIST_HEAD(dto_xprt_q); | ||
| 68 | |||
| 69 | static struct svc_xprt_ops svc_rdma_ops = { | ||
| 70 | .xpo_create = svc_rdma_create, | ||
| 71 | .xpo_recvfrom = svc_rdma_recvfrom, | ||
| 72 | .xpo_sendto = svc_rdma_sendto, | ||
| 73 | .xpo_release_rqst = svc_rdma_release_rqst, | ||
| 74 | .xpo_detach = svc_rdma_detach, | ||
| 75 | .xpo_free = svc_rdma_free, | ||
| 76 | .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, | ||
| 77 | .xpo_has_wspace = svc_rdma_has_wspace, | ||
| 78 | .xpo_accept = svc_rdma_accept, | ||
| 79 | }; | ||
| 80 | |||
| 81 | struct svc_xprt_class svc_rdma_class = { | ||
| 82 | .xcl_name = "rdma", | ||
| 83 | .xcl_owner = THIS_MODULE, | ||
| 84 | .xcl_ops = &svc_rdma_ops, | ||
| 85 | .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, | ||
| 86 | }; | ||
| 87 | |||
| 88 | static int rdma_bump_context_cache(struct svcxprt_rdma *xprt) | ||
| 89 | { | ||
| 90 | int target; | ||
| 91 | int at_least_one = 0; | ||
| 92 | struct svc_rdma_op_ctxt *ctxt; | ||
| 93 | |||
| 94 | target = min(xprt->sc_ctxt_cnt + xprt->sc_ctxt_bump, | ||
| 95 | xprt->sc_ctxt_max); | ||
| 96 | |||
| 97 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
| 98 | while (xprt->sc_ctxt_cnt < target) { | ||
| 99 | xprt->sc_ctxt_cnt++; | ||
| 100 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
| 101 | |||
| 102 | ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); | ||
| 103 | |||
| 104 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
| 105 | if (ctxt) { | ||
| 106 | at_least_one = 1; | ||
| 107 | ctxt->next = xprt->sc_ctxt_head; | ||
| 108 | xprt->sc_ctxt_head = ctxt; | ||
| 109 | } else { | ||
| 110 | /* kmalloc failed...give up for now */ | ||
| 111 | xprt->sc_ctxt_cnt--; | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | } | ||
| 115 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
| 116 | dprintk("svcrdma: sc_ctxt_max=%d, sc_ctxt_cnt=%d\n", | ||
| 117 | xprt->sc_ctxt_max, xprt->sc_ctxt_cnt); | ||
| 118 | return at_least_one; | ||
| 119 | } | ||
| 120 | |||
| 121 | struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) | ||
| 122 | { | ||
| 123 | struct svc_rdma_op_ctxt *ctxt; | ||
| 124 | |||
| 125 | while (1) { | ||
| 126 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
| 127 | if (unlikely(xprt->sc_ctxt_head == NULL)) { | ||
| 128 | /* Try to bump my cache. */ | ||
| 129 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
| 130 | |||
| 131 | if (rdma_bump_context_cache(xprt)) | ||
| 132 | continue; | ||
| 133 | |||
| 134 | printk(KERN_INFO "svcrdma: sleeping waiting for " | ||
| 135 | "context memory on xprt=%p\n", | ||
| 136 | xprt); | ||
| 137 | schedule_timeout_uninterruptible(msecs_to_jiffies(500)); | ||
| 138 | continue; | ||
| 139 | } | ||
| 140 | ctxt = xprt->sc_ctxt_head; | ||
| 141 | xprt->sc_ctxt_head = ctxt->next; | ||
| 142 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
| 143 | ctxt->xprt = xprt; | ||
| 144 | INIT_LIST_HEAD(&ctxt->dto_q); | ||
| 145 | ctxt->count = 0; | ||
| 146 | break; | ||
| 147 | } | ||
| 148 | return ctxt; | ||
| 149 | } | ||
| 150 | |||
| 151 | void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) | ||
| 152 | { | ||
| 153 | struct svcxprt_rdma *xprt; | ||
| 154 | int i; | ||
| 155 | |||
| 156 | BUG_ON(!ctxt); | ||
| 157 | xprt = ctxt->xprt; | ||
| 158 | if (free_pages) | ||
| 159 | for (i = 0; i < ctxt->count; i++) | ||
| 160 | put_page(ctxt->pages[i]); | ||
| 161 | |||
| 162 | for (i = 0; i < ctxt->count; i++) | ||
| 163 | dma_unmap_single(xprt->sc_cm_id->device->dma_device, | ||
| 164 | ctxt->sge[i].addr, | ||
| 165 | ctxt->sge[i].length, | ||
| 166 | ctxt->direction); | ||
| 167 | spin_lock_bh(&xprt->sc_ctxt_lock); | ||
| 168 | ctxt->next = xprt->sc_ctxt_head; | ||
| 169 | xprt->sc_ctxt_head = ctxt; | ||
| 170 | spin_unlock_bh(&xprt->sc_ctxt_lock); | ||
| 171 | } | ||
| 172 | |||
| 173 | /* ib_cq event handler */ | ||
| 174 | static void cq_event_handler(struct ib_event *event, void *context) | ||
| 175 | { | ||
| 176 | struct svc_xprt *xprt = context; | ||
| 177 | dprintk("svcrdma: received CQ event id=%d, context=%p\n", | ||
| 178 | event->event, context); | ||
| 179 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 180 | } | ||
| 181 | |||
| 182 | /* QP event handler */ | ||
| 183 | static void qp_event_handler(struct ib_event *event, void *context) | ||
| 184 | { | ||
| 185 | struct svc_xprt *xprt = context; | ||
| 186 | |||
| 187 | switch (event->event) { | ||
| 188 | /* These are considered benign events */ | ||
| 189 | case IB_EVENT_PATH_MIG: | ||
| 190 | case IB_EVENT_COMM_EST: | ||
| 191 | case IB_EVENT_SQ_DRAINED: | ||
| 192 | case IB_EVENT_QP_LAST_WQE_REACHED: | ||
| 193 | dprintk("svcrdma: QP event %d received for QP=%p\n", | ||
| 194 | event->event, event->element.qp); | ||
| 195 | break; | ||
| 196 | /* These are considered fatal events */ | ||
| 197 | case IB_EVENT_PATH_MIG_ERR: | ||
| 198 | case IB_EVENT_QP_FATAL: | ||
| 199 | case IB_EVENT_QP_REQ_ERR: | ||
| 200 | case IB_EVENT_QP_ACCESS_ERR: | ||
| 201 | case IB_EVENT_DEVICE_FATAL: | ||
| 202 | default: | ||
| 203 | dprintk("svcrdma: QP ERROR event %d received for QP=%p, " | ||
| 204 | "closing transport\n", | ||
| 205 | event->event, event->element.qp); | ||
| 206 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 207 | break; | ||
| 208 | } | ||
| 209 | } | ||
| 210 | |||
| 211 | /* | ||
| 212 | * Data Transfer Operation Tasklet | ||
| 213 | * | ||
| 214 | * Walks a list of transports with I/O pending, removing entries as | ||
| 215 | * they are added to the server's I/O pending list. Two bits indicate | ||
| 216 | * if SQ, RQ, or both have I/O pending. The dto_lock is an irqsave | ||
| 217 | * spinlock that serializes access to the transport list with the RQ | ||
| 218 | * and SQ interrupt handlers. | ||
| 219 | */ | ||
| 220 | static void dto_tasklet_func(unsigned long data) | ||
| 221 | { | ||
| 222 | struct svcxprt_rdma *xprt; | ||
| 223 | unsigned long flags; | ||
| 224 | |||
| 225 | spin_lock_irqsave(&dto_lock, flags); | ||
| 226 | while (!list_empty(&dto_xprt_q)) { | ||
| 227 | xprt = list_entry(dto_xprt_q.next, | ||
| 228 | struct svcxprt_rdma, sc_dto_q); | ||
| 229 | list_del_init(&xprt->sc_dto_q); | ||
| 230 | spin_unlock_irqrestore(&dto_lock, flags); | ||
| 231 | |||
| 232 | if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { | ||
| 233 | ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); | ||
| 234 | rq_cq_reap(xprt); | ||
| 235 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
| 236 | /* | ||
| 237 | * If data arrived before established event, | ||
| 238 | * don't enqueue. This defers RPC I/O until the | ||
| 239 | * RDMA connection is complete. | ||
| 240 | */ | ||
| 241 | if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) | ||
| 242 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
| 243 | } | ||
| 244 | |||
| 245 | if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) { | ||
| 246 | ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); | ||
| 247 | sq_cq_reap(xprt); | ||
| 248 | } | ||
| 249 | |||
| 250 | spin_lock_irqsave(&dto_lock, flags); | ||
| 251 | } | ||
| 252 | spin_unlock_irqrestore(&dto_lock, flags); | ||
| 253 | } | ||
| 254 | |||
| 255 | /* | ||
| 256 | * Receive Queue Completion Handler | ||
| 257 | * | ||
| 258 | * Since an RQ completion handler is called on interrupt context, we | ||
| 259 | * need to defer the handling of the I/O to a tasklet | ||
| 260 | */ | ||
| 261 | static void rq_comp_handler(struct ib_cq *cq, void *cq_context) | ||
| 262 | { | ||
| 263 | struct svcxprt_rdma *xprt = cq_context; | ||
| 264 | unsigned long flags; | ||
| 265 | |||
| 266 | /* | ||
| 267 | * Set the bit regardless of whether or not it's on the list | ||
| 268 | * because it may be on the list already due to an SQ | ||
| 269 | * completion. | ||
| 270 | */ | ||
| 271 | set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); | ||
| 272 | |||
| 273 | /* | ||
| 274 | * If this transport is not already on the DTO transport queue, | ||
| 275 | * add it | ||
| 276 | */ | ||
| 277 | spin_lock_irqsave(&dto_lock, flags); | ||
| 278 | if (list_empty(&xprt->sc_dto_q)) | ||
| 279 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | ||
| 280 | spin_unlock_irqrestore(&dto_lock, flags); | ||
| 281 | |||
| 282 | /* Tasklet does all the work to avoid irqsave locks. */ | ||
| 283 | tasklet_schedule(&dto_tasklet); | ||
| 284 | } | ||
| 285 | |||
| 286 | /* | ||
| 287 | * rq_cq_reap - Process the RQ CQ. | ||
| 288 | * | ||
| 289 | * Take all completing WC off the CQE and enqueue the associated DTO | ||
| 290 | * context on the dto_q for the transport. | ||
| 291 | */ | ||
| 292 | static void rq_cq_reap(struct svcxprt_rdma *xprt) | ||
| 293 | { | ||
| 294 | int ret; | ||
| 295 | struct ib_wc wc; | ||
| 296 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
| 297 | |||
| 298 | atomic_inc(&rdma_stat_rq_poll); | ||
| 299 | |||
| 300 | spin_lock_bh(&xprt->sc_rq_dto_lock); | ||
| 301 | while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { | ||
| 302 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
| 303 | ctxt->wc_status = wc.status; | ||
| 304 | ctxt->byte_len = wc.byte_len; | ||
| 305 | if (wc.status != IB_WC_SUCCESS) { | ||
| 306 | /* Close the transport */ | ||
| 307 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
| 308 | svc_rdma_put_context(ctxt, 1); | ||
| 309 | continue; | ||
| 310 | } | ||
| 311 | list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); | ||
| 312 | } | ||
| 313 | spin_unlock_bh(&xprt->sc_rq_dto_lock); | ||
| 314 | |||
| 315 | if (ctxt) | ||
| 316 | atomic_inc(&rdma_stat_rq_prod); | ||
| 317 | } | ||
| 318 | |||
| 319 | /* | ||
| 320 | * Send Queue Completion Handler - potentially called on interrupt context. | ||
| 321 | */ | ||
| 322 | static void sq_cq_reap(struct svcxprt_rdma *xprt) | ||
| 323 | { | ||
| 324 | struct svc_rdma_op_ctxt *ctxt = NULL; | ||
| 325 | struct ib_wc wc; | ||
| 326 | struct ib_cq *cq = xprt->sc_sq_cq; | ||
| 327 | int ret; | ||
| 328 | |||
| 329 | atomic_inc(&rdma_stat_sq_poll); | ||
| 330 | while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { | ||
| 331 | ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; | ||
| 332 | xprt = ctxt->xprt; | ||
| 333 | |||
| 334 | if (wc.status != IB_WC_SUCCESS) | ||
| 335 | /* Close the transport */ | ||
| 336 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
| 337 | |||
| 338 | /* Decrement used SQ WR count */ | ||
| 339 | atomic_dec(&xprt->sc_sq_count); | ||
| 340 | wake_up(&xprt->sc_send_wait); | ||
| 341 | |||
| 342 | switch (ctxt->wr_op) { | ||
| 343 | case IB_WR_SEND: | ||
| 344 | case IB_WR_RDMA_WRITE: | ||
| 345 | svc_rdma_put_context(ctxt, 1); | ||
| 346 | break; | ||
| 347 | |||
| 348 | case IB_WR_RDMA_READ: | ||
| 349 | if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { | ||
| 350 | set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); | ||
| 351 | set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags); | ||
| 352 | spin_lock_bh(&xprt->sc_read_complete_lock); | ||
| 353 | list_add_tail(&ctxt->dto_q, | ||
| 354 | &xprt->sc_read_complete_q); | ||
| 355 | spin_unlock_bh(&xprt->sc_read_complete_lock); | ||
| 356 | svc_xprt_enqueue(&xprt->sc_xprt); | ||
| 357 | } | ||
| 358 | break; | ||
| 359 | |||
| 360 | default: | ||
| 361 | printk(KERN_ERR "svcrdma: unexpected completion type, " | ||
| 362 | "opcode=%d, status=%d\n", | ||
| 363 | wc.opcode, wc.status); | ||
| 364 | break; | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | if (ctxt) | ||
| 369 | atomic_inc(&rdma_stat_sq_prod); | ||
| 370 | } | ||
| 371 | |||
| 372 | static void sq_comp_handler(struct ib_cq *cq, void *cq_context) | ||
| 373 | { | ||
| 374 | struct svcxprt_rdma *xprt = cq_context; | ||
| 375 | unsigned long flags; | ||
| 376 | |||
| 377 | /* | ||
| 378 | * Set the bit regardless of whether or not it's on the list | ||
| 379 | * because it may be on the list already due to an RQ | ||
| 380 | * completion. | ||
| 381 | */ | ||
| 382 | set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); | ||
| 383 | |||
| 384 | /* | ||
| 385 | * If this transport is not already on the DTO transport queue, | ||
| 386 | * add it | ||
| 387 | */ | ||
| 388 | spin_lock_irqsave(&dto_lock, flags); | ||
| 389 | if (list_empty(&xprt->sc_dto_q)) | ||
| 390 | list_add_tail(&xprt->sc_dto_q, &dto_xprt_q); | ||
| 391 | spin_unlock_irqrestore(&dto_lock, flags); | ||
| 392 | |||
| 393 | /* Tasklet does all the work to avoid irqsave locks. */ | ||
| 394 | tasklet_schedule(&dto_tasklet); | ||
| 395 | } | ||
| 396 | |||
| 397 | static void create_context_cache(struct svcxprt_rdma *xprt, | ||
| 398 | int ctxt_count, int ctxt_bump, int ctxt_max) | ||
| 399 | { | ||
| 400 | struct svc_rdma_op_ctxt *ctxt; | ||
| 401 | int i; | ||
| 402 | |||
| 403 | xprt->sc_ctxt_max = ctxt_max; | ||
| 404 | xprt->sc_ctxt_bump = ctxt_bump; | ||
| 405 | xprt->sc_ctxt_cnt = 0; | ||
| 406 | xprt->sc_ctxt_head = NULL; | ||
| 407 | for (i = 0; i < ctxt_count; i++) { | ||
| 408 | ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); | ||
| 409 | if (ctxt) { | ||
| 410 | ctxt->next = xprt->sc_ctxt_head; | ||
| 411 | xprt->sc_ctxt_head = ctxt; | ||
| 412 | xprt->sc_ctxt_cnt++; | ||
| 413 | } | ||
| 414 | } | ||
| 415 | } | ||
| 416 | |||
| 417 | static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) | ||
| 418 | { | ||
| 419 | struct svc_rdma_op_ctxt *next; | ||
| 420 | if (!ctxt) | ||
| 421 | return; | ||
| 422 | |||
| 423 | do { | ||
| 424 | next = ctxt->next; | ||
| 425 | kfree(ctxt); | ||
| 426 | ctxt = next; | ||
| 427 | } while (next); | ||
| 428 | } | ||
| 429 | |||
| 430 | static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, | ||
| 431 | int listener) | ||
| 432 | { | ||
| 433 | struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL); | ||
| 434 | |||
| 435 | if (!cma_xprt) | ||
| 436 | return NULL; | ||
| 437 | svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv); | ||
| 438 | INIT_LIST_HEAD(&cma_xprt->sc_accept_q); | ||
| 439 | INIT_LIST_HEAD(&cma_xprt->sc_dto_q); | ||
| 440 | INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); | ||
| 441 | INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); | ||
| 442 | init_waitqueue_head(&cma_xprt->sc_send_wait); | ||
| 443 | |||
| 444 | spin_lock_init(&cma_xprt->sc_lock); | ||
| 445 | spin_lock_init(&cma_xprt->sc_read_complete_lock); | ||
| 446 | spin_lock_init(&cma_xprt->sc_ctxt_lock); | ||
| 447 | spin_lock_init(&cma_xprt->sc_rq_dto_lock); | ||
| 448 | |||
| 449 | cma_xprt->sc_ord = svcrdma_ord; | ||
| 450 | |||
| 451 | cma_xprt->sc_max_req_size = svcrdma_max_req_size; | ||
| 452 | cma_xprt->sc_max_requests = svcrdma_max_requests; | ||
| 453 | cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT; | ||
| 454 | atomic_set(&cma_xprt->sc_sq_count, 0); | ||
| 455 | |||
| 456 | if (!listener) { | ||
| 457 | int reqs = cma_xprt->sc_max_requests; | ||
| 458 | create_context_cache(cma_xprt, | ||
| 459 | reqs << 1, /* starting size */ | ||
| 460 | reqs, /* bump amount */ | ||
| 461 | reqs + | ||
| 462 | cma_xprt->sc_sq_depth + | ||
| 463 | RPCRDMA_MAX_THREADS + 1); /* max */ | ||
| 464 | if (!cma_xprt->sc_ctxt_head) { | ||
| 465 | kfree(cma_xprt); | ||
| 466 | return NULL; | ||
| 467 | } | ||
| 468 | clear_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); | ||
| 469 | } else | ||
| 470 | set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags); | ||
| 471 | |||
| 472 | return cma_xprt; | ||
| 473 | } | ||
| 474 | |||
| 475 | struct page *svc_rdma_get_page(void) | ||
| 476 | { | ||
| 477 | struct page *page; | ||
| 478 | |||
| 479 | while ((page = alloc_page(GFP_KERNEL)) == NULL) { | ||
| 480 | /* If we can't get memory, wait a bit and try again */ | ||
| 481 | printk(KERN_INFO "svcrdma: out of memory...retrying in 1000 " | ||
| 482 | "jiffies.\n"); | ||
| 483 | schedule_timeout_uninterruptible(msecs_to_jiffies(1000)); | ||
| 484 | } | ||
| 485 | return page; | ||
| 486 | } | ||
| 487 | |||
| 488 | int svc_rdma_post_recv(struct svcxprt_rdma *xprt) | ||
| 489 | { | ||
| 490 | struct ib_recv_wr recv_wr, *bad_recv_wr; | ||
| 491 | struct svc_rdma_op_ctxt *ctxt; | ||
| 492 | struct page *page; | ||
| 493 | unsigned long pa; | ||
| 494 | int sge_no; | ||
| 495 | int buflen; | ||
| 496 | int ret; | ||
| 497 | |||
| 498 | ctxt = svc_rdma_get_context(xprt); | ||
| 499 | buflen = 0; | ||
| 500 | ctxt->direction = DMA_FROM_DEVICE; | ||
| 501 | for (sge_no = 0; buflen < xprt->sc_max_req_size; sge_no++) { | ||
| 502 | BUG_ON(sge_no >= xprt->sc_max_sge); | ||
| 503 | page = svc_rdma_get_page(); | ||
| 504 | ctxt->pages[sge_no] = page; | ||
| 505 | pa = ib_dma_map_page(xprt->sc_cm_id->device, | ||
| 506 | page, 0, PAGE_SIZE, | ||
| 507 | DMA_FROM_DEVICE); | ||
| 508 | ctxt->sge[sge_no].addr = pa; | ||
| 509 | ctxt->sge[sge_no].length = PAGE_SIZE; | ||
| 510 | ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; | ||
| 511 | buflen += PAGE_SIZE; | ||
| 512 | } | ||
| 513 | ctxt->count = sge_no; | ||
| 514 | recv_wr.next = NULL; | ||
| 515 | recv_wr.sg_list = &ctxt->sge[0]; | ||
| 516 | recv_wr.num_sge = ctxt->count; | ||
| 517 | recv_wr.wr_id = (u64)(unsigned long)ctxt; | ||
| 518 | |||
| 519 | ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); | ||
| 520 | return ret; | ||
| 521 | } | ||
| 522 | |||
| 523 | /* | ||
| 524 | * This function handles the CONNECT_REQUEST event on a listening | ||
| 525 | * endpoint. It is passed the cma_id for the _new_ connection. The context in | ||
| 526 | * this cma_id is inherited from the listening cma_id and is the svc_xprt | ||
| 527 | * structure for the listening endpoint. | ||
| 528 | * | ||
| 529 | * This function creates a new xprt for the new connection and enqueues it on | ||
| 530 | * the accept queue for the listent xprt. When the listen thread is kicked, it | ||
| 531 | * will call the recvfrom method on the listen xprt which will accept the new | ||
| 532 | * connection. | ||
| 533 | */ | ||
| 534 | static void handle_connect_req(struct rdma_cm_id *new_cma_id) | ||
| 535 | { | ||
| 536 | struct svcxprt_rdma *listen_xprt = new_cma_id->context; | ||
| 537 | struct svcxprt_rdma *newxprt; | ||
| 538 | |||
| 539 | /* Create a new transport */ | ||
| 540 | newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); | ||
| 541 | if (!newxprt) { | ||
| 542 | dprintk("svcrdma: failed to create new transport\n"); | ||
| 543 | return; | ||
| 544 | } | ||
| 545 | newxprt->sc_cm_id = new_cma_id; | ||
| 546 | new_cma_id->context = newxprt; | ||
| 547 | dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", | ||
| 548 | newxprt, newxprt->sc_cm_id, listen_xprt); | ||
| 549 | |||
| 550 | /* | ||
| 551 | * Enqueue the new transport on the accept queue of the listening | ||
| 552 | * transport | ||
| 553 | */ | ||
| 554 | spin_lock_bh(&listen_xprt->sc_lock); | ||
| 555 | list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q); | ||
| 556 | spin_unlock_bh(&listen_xprt->sc_lock); | ||
| 557 | |||
| 558 | /* | ||
| 559 | * Can't use svc_xprt_received here because we are not on a | ||
| 560 | * rqstp thread | ||
| 561 | */ | ||
| 562 | set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags); | ||
| 563 | svc_xprt_enqueue(&listen_xprt->sc_xprt); | ||
| 564 | } | ||
| 565 | |||
| 566 | /* | ||
| 567 | * Handles events generated on the listening endpoint. These events will be | ||
| 568 | * either be incoming connect requests or adapter removal events. | ||
| 569 | */ | ||
| 570 | static int rdma_listen_handler(struct rdma_cm_id *cma_id, | ||
| 571 | struct rdma_cm_event *event) | ||
| 572 | { | ||
| 573 | struct svcxprt_rdma *xprt = cma_id->context; | ||
| 574 | int ret = 0; | ||
| 575 | |||
| 576 | switch (event->event) { | ||
| 577 | case RDMA_CM_EVENT_CONNECT_REQUEST: | ||
| 578 | dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " | ||
| 579 | "event=%d\n", cma_id, cma_id->context, event->event); | ||
| 580 | handle_connect_req(cma_id); | ||
| 581 | break; | ||
| 582 | |||
| 583 | case RDMA_CM_EVENT_ESTABLISHED: | ||
| 584 | /* Accept complete */ | ||
| 585 | dprintk("svcrdma: Connection completed on LISTEN xprt=%p, " | ||
| 586 | "cm_id=%p\n", xprt, cma_id); | ||
| 587 | break; | ||
| 588 | |||
| 589 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
| 590 | dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", | ||
| 591 | xprt, cma_id); | ||
| 592 | if (xprt) | ||
| 593 | set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); | ||
| 594 | break; | ||
| 595 | |||
| 596 | default: | ||
| 597 | dprintk("svcrdma: Unexpected event on listening endpoint %p, " | ||
| 598 | "event=%d\n", cma_id, event->event); | ||
| 599 | break; | ||
| 600 | } | ||
| 601 | |||
| 602 | return ret; | ||
| 603 | } | ||
| 604 | |||
| 605 | static int rdma_cma_handler(struct rdma_cm_id *cma_id, | ||
| 606 | struct rdma_cm_event *event) | ||
| 607 | { | ||
| 608 | struct svc_xprt *xprt = cma_id->context; | ||
| 609 | struct svcxprt_rdma *rdma = | ||
| 610 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
| 611 | switch (event->event) { | ||
| 612 | case RDMA_CM_EVENT_ESTABLISHED: | ||
| 613 | /* Accept complete */ | ||
| 614 | dprintk("svcrdma: Connection completed on DTO xprt=%p, " | ||
| 615 | "cm_id=%p\n", xprt, cma_id); | ||
| 616 | clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags); | ||
| 617 | svc_xprt_enqueue(xprt); | ||
| 618 | break; | ||
| 619 | case RDMA_CM_EVENT_DISCONNECTED: | ||
| 620 | dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n", | ||
| 621 | xprt, cma_id); | ||
| 622 | if (xprt) { | ||
| 623 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 624 | svc_xprt_enqueue(xprt); | ||
| 625 | } | ||
| 626 | break; | ||
| 627 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
| 628 | dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, " | ||
| 629 | "event=%d\n", cma_id, xprt, event->event); | ||
| 630 | if (xprt) { | ||
| 631 | set_bit(XPT_CLOSE, &xprt->xpt_flags); | ||
| 632 | svc_xprt_enqueue(xprt); | ||
| 633 | } | ||
| 634 | break; | ||
| 635 | default: | ||
| 636 | dprintk("svcrdma: Unexpected event on DTO endpoint %p, " | ||
| 637 | "event=%d\n", cma_id, event->event); | ||
| 638 | break; | ||
| 639 | } | ||
| 640 | return 0; | ||
| 641 | } | ||
| 642 | |||
| 643 | /* | ||
| 644 | * Create a listening RDMA service endpoint. | ||
| 645 | */ | ||
| 646 | static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, | ||
| 647 | struct sockaddr *sa, int salen, | ||
| 648 | int flags) | ||
| 649 | { | ||
| 650 | struct rdma_cm_id *listen_id; | ||
| 651 | struct svcxprt_rdma *cma_xprt; | ||
| 652 | struct svc_xprt *xprt; | ||
| 653 | int ret; | ||
| 654 | |||
| 655 | dprintk("svcrdma: Creating RDMA socket\n"); | ||
| 656 | |||
| 657 | cma_xprt = rdma_create_xprt(serv, 1); | ||
| 658 | if (!cma_xprt) | ||
| 659 | return ERR_PTR(ENOMEM); | ||
| 660 | xprt = &cma_xprt->sc_xprt; | ||
| 661 | |||
| 662 | listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); | ||
| 663 | if (IS_ERR(listen_id)) { | ||
| 664 | rdma_destroy_xprt(cma_xprt); | ||
| 665 | dprintk("svcrdma: rdma_create_id failed = %ld\n", | ||
| 666 | PTR_ERR(listen_id)); | ||
| 667 | return (void *)listen_id; | ||
| 668 | } | ||
| 669 | ret = rdma_bind_addr(listen_id, sa); | ||
| 670 | if (ret) { | ||
| 671 | rdma_destroy_xprt(cma_xprt); | ||
| 672 | rdma_destroy_id(listen_id); | ||
| 673 | dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); | ||
| 674 | return ERR_PTR(ret); | ||
| 675 | } | ||
| 676 | cma_xprt->sc_cm_id = listen_id; | ||
| 677 | |||
| 678 | ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); | ||
| 679 | if (ret) { | ||
| 680 | rdma_destroy_id(listen_id); | ||
| 681 | rdma_destroy_xprt(cma_xprt); | ||
| 682 | dprintk("svcrdma: rdma_listen failed = %d\n", ret); | ||
| 683 | } | ||
| 684 | |||
| 685 | /* | ||
| 686 | * We need to use the address from the cm_id in case the | ||
| 687 | * caller specified 0 for the port number. | ||
| 688 | */ | ||
| 689 | sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr; | ||
| 690 | svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); | ||
| 691 | |||
| 692 | return &cma_xprt->sc_xprt; | ||
| 693 | } | ||
| 694 | |||
| 695 | /* | ||
| 696 | * This is the xpo_recvfrom function for listening endpoints. Its | ||
| 697 | * purpose is to accept incoming connections. The CMA callback handler | ||
| 698 | * has already created a new transport and attached it to the new CMA | ||
| 699 | * ID. | ||
| 700 | * | ||
| 701 | * There is a queue of pending connections hung on the listening | ||
| 702 | * transport. This queue contains the new svc_xprt structure. This | ||
| 703 | * function takes svc_xprt structures off the accept_q and completes | ||
| 704 | * the connection. | ||
| 705 | */ | ||
| 706 | static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) | ||
| 707 | { | ||
| 708 | struct svcxprt_rdma *listen_rdma; | ||
| 709 | struct svcxprt_rdma *newxprt = NULL; | ||
| 710 | struct rdma_conn_param conn_param; | ||
| 711 | struct ib_qp_init_attr qp_attr; | ||
| 712 | struct ib_device_attr devattr; | ||
| 713 | struct sockaddr *sa; | ||
| 714 | int ret; | ||
| 715 | int i; | ||
| 716 | |||
| 717 | listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
| 718 | clear_bit(XPT_CONN, &xprt->xpt_flags); | ||
| 719 | /* Get the next entry off the accept list */ | ||
| 720 | spin_lock_bh(&listen_rdma->sc_lock); | ||
| 721 | if (!list_empty(&listen_rdma->sc_accept_q)) { | ||
| 722 | newxprt = list_entry(listen_rdma->sc_accept_q.next, | ||
| 723 | struct svcxprt_rdma, sc_accept_q); | ||
| 724 | list_del_init(&newxprt->sc_accept_q); | ||
| 725 | } | ||
| 726 | if (!list_empty(&listen_rdma->sc_accept_q)) | ||
| 727 | set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags); | ||
| 728 | spin_unlock_bh(&listen_rdma->sc_lock); | ||
| 729 | if (!newxprt) | ||
| 730 | return NULL; | ||
| 731 | |||
| 732 | dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", | ||
| 733 | newxprt, newxprt->sc_cm_id); | ||
| 734 | |||
| 735 | ret = ib_query_device(newxprt->sc_cm_id->device, &devattr); | ||
| 736 | if (ret) { | ||
| 737 | dprintk("svcrdma: could not query device attributes on " | ||
| 738 | "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret); | ||
| 739 | goto errout; | ||
| 740 | } | ||
| 741 | |||
| 742 | /* Qualify the transport resource defaults with the | ||
| 743 | * capabilities of this particular device */ | ||
| 744 | newxprt->sc_max_sge = min((size_t)devattr.max_sge, | ||
| 745 | (size_t)RPCSVC_MAXPAGES); | ||
| 746 | newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr, | ||
| 747 | (size_t)svcrdma_max_requests); | ||
| 748 | newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests; | ||
| 749 | |||
| 750 | newxprt->sc_ord = min((size_t)devattr.max_qp_rd_atom, | ||
| 751 | (size_t)svcrdma_ord); | ||
| 752 | |||
| 753 | newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device); | ||
| 754 | if (IS_ERR(newxprt->sc_pd)) { | ||
| 755 | dprintk("svcrdma: error creating PD for connect request\n"); | ||
| 756 | goto errout; | ||
| 757 | } | ||
| 758 | newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device, | ||
| 759 | sq_comp_handler, | ||
| 760 | cq_event_handler, | ||
| 761 | newxprt, | ||
| 762 | newxprt->sc_sq_depth, | ||
| 763 | 0); | ||
| 764 | if (IS_ERR(newxprt->sc_sq_cq)) { | ||
| 765 | dprintk("svcrdma: error creating SQ CQ for connect request\n"); | ||
| 766 | goto errout; | ||
| 767 | } | ||
| 768 | newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device, | ||
| 769 | rq_comp_handler, | ||
| 770 | cq_event_handler, | ||
| 771 | newxprt, | ||
| 772 | newxprt->sc_max_requests, | ||
| 773 | 0); | ||
| 774 | if (IS_ERR(newxprt->sc_rq_cq)) { | ||
| 775 | dprintk("svcrdma: error creating RQ CQ for connect request\n"); | ||
| 776 | goto errout; | ||
| 777 | } | ||
| 778 | |||
| 779 | memset(&qp_attr, 0, sizeof qp_attr); | ||
| 780 | qp_attr.event_handler = qp_event_handler; | ||
| 781 | qp_attr.qp_context = &newxprt->sc_xprt; | ||
| 782 | qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; | ||
| 783 | qp_attr.cap.max_recv_wr = newxprt->sc_max_requests; | ||
| 784 | qp_attr.cap.max_send_sge = newxprt->sc_max_sge; | ||
| 785 | qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; | ||
| 786 | qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; | ||
| 787 | qp_attr.qp_type = IB_QPT_RC; | ||
| 788 | qp_attr.send_cq = newxprt->sc_sq_cq; | ||
| 789 | qp_attr.recv_cq = newxprt->sc_rq_cq; | ||
| 790 | dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n" | ||
| 791 | " cm_id->device=%p, sc_pd->device=%p\n" | ||
| 792 | " cap.max_send_wr = %d\n" | ||
| 793 | " cap.max_recv_wr = %d\n" | ||
| 794 | " cap.max_send_sge = %d\n" | ||
| 795 | " cap.max_recv_sge = %d\n", | ||
| 796 | newxprt->sc_cm_id, newxprt->sc_pd, | ||
| 797 | newxprt->sc_cm_id->device, newxprt->sc_pd->device, | ||
| 798 | qp_attr.cap.max_send_wr, | ||
| 799 | qp_attr.cap.max_recv_wr, | ||
| 800 | qp_attr.cap.max_send_sge, | ||
| 801 | qp_attr.cap.max_recv_sge); | ||
| 802 | |||
| 803 | ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); | ||
| 804 | if (ret) { | ||
| 805 | /* | ||
| 806 | * XXX: This is a hack. We need a xx_request_qp interface | ||
| 807 | * that will adjust the qp_attr's with a best-effort | ||
| 808 | * number | ||
| 809 | */ | ||
| 810 | qp_attr.cap.max_send_sge -= 2; | ||
| 811 | qp_attr.cap.max_recv_sge -= 2; | ||
| 812 | ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, | ||
| 813 | &qp_attr); | ||
| 814 | if (ret) { | ||
| 815 | dprintk("svcrdma: failed to create QP, ret=%d\n", ret); | ||
| 816 | goto errout; | ||
| 817 | } | ||
| 818 | newxprt->sc_max_sge = qp_attr.cap.max_send_sge; | ||
| 819 | newxprt->sc_max_sge = qp_attr.cap.max_recv_sge; | ||
| 820 | newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; | ||
| 821 | newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; | ||
| 822 | } | ||
| 823 | newxprt->sc_qp = newxprt->sc_cm_id->qp; | ||
| 824 | |||
| 825 | /* Register all of physical memory */ | ||
| 826 | newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, | ||
| 827 | IB_ACCESS_LOCAL_WRITE | | ||
| 828 | IB_ACCESS_REMOTE_WRITE); | ||
| 829 | if (IS_ERR(newxprt->sc_phys_mr)) { | ||
| 830 | dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); | ||
| 831 | goto errout; | ||
| 832 | } | ||
| 833 | |||
| 834 | /* Post receive buffers */ | ||
| 835 | for (i = 0; i < newxprt->sc_max_requests; i++) { | ||
| 836 | ret = svc_rdma_post_recv(newxprt); | ||
| 837 | if (ret) { | ||
| 838 | dprintk("svcrdma: failure posting receive buffers\n"); | ||
| 839 | goto errout; | ||
| 840 | } | ||
| 841 | } | ||
| 842 | |||
| 843 | /* Swap out the handler */ | ||
| 844 | newxprt->sc_cm_id->event_handler = rdma_cma_handler; | ||
| 845 | |||
| 846 | /* Accept Connection */ | ||
| 847 | set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); | ||
| 848 | memset(&conn_param, 0, sizeof conn_param); | ||
| 849 | conn_param.responder_resources = 0; | ||
| 850 | conn_param.initiator_depth = newxprt->sc_ord; | ||
| 851 | ret = rdma_accept(newxprt->sc_cm_id, &conn_param); | ||
| 852 | if (ret) { | ||
| 853 | dprintk("svcrdma: failed to accept new connection, ret=%d\n", | ||
| 854 | ret); | ||
| 855 | goto errout; | ||
| 856 | } | ||
| 857 | |||
| 858 | dprintk("svcrdma: new connection %p accepted with the following " | ||
| 859 | "attributes:\n" | ||
| 860 | " local_ip : %d.%d.%d.%d\n" | ||
| 861 | " local_port : %d\n" | ||
| 862 | " remote_ip : %d.%d.%d.%d\n" | ||
| 863 | " remote_port : %d\n" | ||
| 864 | " max_sge : %d\n" | ||
| 865 | " sq_depth : %d\n" | ||
| 866 | " max_requests : %d\n" | ||
| 867 | " ord : %d\n", | ||
| 868 | newxprt, | ||
| 869 | NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
| 870 | route.addr.src_addr)->sin_addr.s_addr), | ||
| 871 | ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
| 872 | route.addr.src_addr)->sin_port), | ||
| 873 | NIPQUAD(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
| 874 | route.addr.dst_addr)->sin_addr.s_addr), | ||
| 875 | ntohs(((struct sockaddr_in *)&newxprt->sc_cm_id-> | ||
| 876 | route.addr.dst_addr)->sin_port), | ||
| 877 | newxprt->sc_max_sge, | ||
| 878 | newxprt->sc_sq_depth, | ||
| 879 | newxprt->sc_max_requests, | ||
| 880 | newxprt->sc_ord); | ||
| 881 | |||
| 882 | /* Set the local and remote addresses in the transport */ | ||
| 883 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; | ||
| 884 | svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); | ||
| 885 | sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr; | ||
| 886 | svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa)); | ||
| 887 | |||
| 888 | ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP); | ||
| 889 | ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP); | ||
| 890 | return &newxprt->sc_xprt; | ||
| 891 | |||
| 892 | errout: | ||
| 893 | dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); | ||
| 894 | rdma_destroy_id(newxprt->sc_cm_id); | ||
| 895 | rdma_destroy_xprt(newxprt); | ||
| 896 | return NULL; | ||
| 897 | } | ||
| 898 | |||
| 899 | /* | ||
| 900 | * Post an RQ WQE to the RQ when the rqst is being released. This | ||
| 901 | * effectively returns an RQ credit to the client. The rq_xprt_ctxt | ||
| 902 | * will be null if the request is deferred due to an RDMA_READ or the | ||
| 903 | * transport had no data ready (EAGAIN). Note that an RPC deferred in | ||
| 904 | * svc_process will still return the credit, this is because the data | ||
| 905 | * is copied and no longer consume a WQE/WC. | ||
| 906 | */ | ||
| 907 | static void svc_rdma_release_rqst(struct svc_rqst *rqstp) | ||
| 908 | { | ||
| 909 | int err; | ||
| 910 | struct svcxprt_rdma *rdma = | ||
| 911 | container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt); | ||
| 912 | if (rqstp->rq_xprt_ctxt) { | ||
| 913 | BUG_ON(rqstp->rq_xprt_ctxt != rdma); | ||
| 914 | err = svc_rdma_post_recv(rdma); | ||
| 915 | if (err) | ||
| 916 | dprintk("svcrdma: failed to post an RQ WQE error=%d\n", | ||
| 917 | err); | ||
| 918 | } | ||
| 919 | rqstp->rq_xprt_ctxt = NULL; | ||
| 920 | } | ||
| 921 | |||
| 922 | /* Disable data ready events for this connection */ | ||
| 923 | static void svc_rdma_detach(struct svc_xprt *xprt) | ||
| 924 | { | ||
| 925 | struct svcxprt_rdma *rdma = | ||
| 926 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
| 927 | unsigned long flags; | ||
| 928 | |||
| 929 | dprintk("svc: svc_rdma_detach(%p)\n", xprt); | ||
| 930 | /* | ||
| 931 | * Shutdown the connection. This will ensure we don't get any | ||
| 932 | * more events from the provider. | ||
| 933 | */ | ||
| 934 | rdma_disconnect(rdma->sc_cm_id); | ||
| 935 | rdma_destroy_id(rdma->sc_cm_id); | ||
| 936 | |||
| 937 | /* We may already be on the DTO list */ | ||
| 938 | spin_lock_irqsave(&dto_lock, flags); | ||
| 939 | if (!list_empty(&rdma->sc_dto_q)) | ||
| 940 | list_del_init(&rdma->sc_dto_q); | ||
| 941 | spin_unlock_irqrestore(&dto_lock, flags); | ||
| 942 | } | ||
| 943 | |||
| 944 | static void svc_rdma_free(struct svc_xprt *xprt) | ||
| 945 | { | ||
| 946 | struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; | ||
| 947 | dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); | ||
| 948 | rdma_destroy_xprt(rdma); | ||
| 949 | kfree(rdma); | ||
| 950 | } | ||
| 951 | |||
| 952 | static void rdma_destroy_xprt(struct svcxprt_rdma *xprt) | ||
| 953 | { | ||
| 954 | if (xprt->sc_qp && !IS_ERR(xprt->sc_qp)) | ||
| 955 | ib_destroy_qp(xprt->sc_qp); | ||
| 956 | |||
| 957 | if (xprt->sc_sq_cq && !IS_ERR(xprt->sc_sq_cq)) | ||
| 958 | ib_destroy_cq(xprt->sc_sq_cq); | ||
| 959 | |||
| 960 | if (xprt->sc_rq_cq && !IS_ERR(xprt->sc_rq_cq)) | ||
| 961 | ib_destroy_cq(xprt->sc_rq_cq); | ||
| 962 | |||
| 963 | if (xprt->sc_phys_mr && !IS_ERR(xprt->sc_phys_mr)) | ||
| 964 | ib_dereg_mr(xprt->sc_phys_mr); | ||
| 965 | |||
| 966 | if (xprt->sc_pd && !IS_ERR(xprt->sc_pd)) | ||
| 967 | ib_dealloc_pd(xprt->sc_pd); | ||
| 968 | |||
| 969 | destroy_context_cache(xprt->sc_ctxt_head); | ||
| 970 | } | ||
| 971 | |||
| 972 | static int svc_rdma_has_wspace(struct svc_xprt *xprt) | ||
| 973 | { | ||
| 974 | struct svcxprt_rdma *rdma = | ||
| 975 | container_of(xprt, struct svcxprt_rdma, sc_xprt); | ||
| 976 | |||
| 977 | /* | ||
| 978 | * If there are fewer SQ WR available than required to send a | ||
| 979 | * simple response, return false. | ||
| 980 | */ | ||
| 981 | if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3)) | ||
| 982 | return 0; | ||
| 983 | |||
| 984 | /* | ||
| 985 | * ...or there are already waiters on the SQ, | ||
| 986 | * return false. | ||
| 987 | */ | ||
| 988 | if (waitqueue_active(&rdma->sc_send_wait)) | ||
| 989 | return 0; | ||
| 990 | |||
| 991 | /* Otherwise return true. */ | ||
| 992 | return 1; | ||
| 993 | } | ||
| 994 | |||
| 995 | int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) | ||
| 996 | { | ||
| 997 | struct ib_send_wr *bad_wr; | ||
| 998 | int ret; | ||
| 999 | |||
| 1000 | if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) | ||
| 1001 | return 0; | ||
| 1002 | |||
| 1003 | BUG_ON(wr->send_flags != IB_SEND_SIGNALED); | ||
| 1004 | BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != | ||
| 1005 | wr->opcode); | ||
| 1006 | /* If the SQ is full, wait until an SQ entry is available */ | ||
| 1007 | while (1) { | ||
| 1008 | spin_lock_bh(&xprt->sc_lock); | ||
| 1009 | if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { | ||
| 1010 | spin_unlock_bh(&xprt->sc_lock); | ||
| 1011 | atomic_inc(&rdma_stat_sq_starve); | ||
| 1012 | /* See if we can reap some SQ WR */ | ||
| 1013 | sq_cq_reap(xprt); | ||
| 1014 | |||
| 1015 | /* Wait until SQ WR available if SQ still full */ | ||
| 1016 | wait_event(xprt->sc_send_wait, | ||
| 1017 | atomic_read(&xprt->sc_sq_count) < | ||
| 1018 | xprt->sc_sq_depth); | ||
| 1019 | continue; | ||
| 1020 | } | ||
| 1021 | /* Bumped used SQ WR count and post */ | ||
| 1022 | ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); | ||
| 1023 | if (!ret) | ||
| 1024 | atomic_inc(&xprt->sc_sq_count); | ||
| 1025 | else | ||
| 1026 | dprintk("svcrdma: failed to post SQ WR rc=%d, " | ||
| 1027 | "sc_sq_count=%d, sc_sq_depth=%d\n", | ||
| 1028 | ret, atomic_read(&xprt->sc_sq_count), | ||
| 1029 | xprt->sc_sq_depth); | ||
| 1030 | spin_unlock_bh(&xprt->sc_lock); | ||
| 1031 | break; | ||
| 1032 | } | ||
| 1033 | return ret; | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, | ||
| 1037 | enum rpcrdma_errcode err) | ||
| 1038 | { | ||
| 1039 | struct ib_send_wr err_wr; | ||
| 1040 | struct ib_sge sge; | ||
| 1041 | struct page *p; | ||
| 1042 | struct svc_rdma_op_ctxt *ctxt; | ||
| 1043 | u32 *va; | ||
| 1044 | int length; | ||
| 1045 | int ret; | ||
| 1046 | |||
| 1047 | p = svc_rdma_get_page(); | ||
| 1048 | va = page_address(p); | ||
| 1049 | |||
| 1050 | /* XDR encode error */ | ||
| 1051 | length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); | ||
| 1052 | |||
| 1053 | /* Prepare SGE for local address */ | ||
| 1054 | sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, | ||
| 1055 | p, 0, PAGE_SIZE, DMA_FROM_DEVICE); | ||
| 1056 | sge.lkey = xprt->sc_phys_mr->lkey; | ||
| 1057 | sge.length = length; | ||
| 1058 | |||
| 1059 | ctxt = svc_rdma_get_context(xprt); | ||
| 1060 | ctxt->count = 1; | ||
| 1061 | ctxt->pages[0] = p; | ||
| 1062 | |||
| 1063 | /* Prepare SEND WR */ | ||
| 1064 | memset(&err_wr, 0, sizeof err_wr); | ||
| 1065 | ctxt->wr_op = IB_WR_SEND; | ||
| 1066 | err_wr.wr_id = (unsigned long)ctxt; | ||
| 1067 | err_wr.sg_list = &sge; | ||
| 1068 | err_wr.num_sge = 1; | ||
| 1069 | err_wr.opcode = IB_WR_SEND; | ||
| 1070 | err_wr.send_flags = IB_SEND_SIGNALED; | ||
| 1071 | |||
| 1072 | /* Post It */ | ||
| 1073 | ret = svc_rdma_send(xprt, &err_wr); | ||
| 1074 | if (ret) { | ||
| 1075 | dprintk("svcrdma: Error posting send = %d\n", ret); | ||
| 1076 | svc_rdma_put_context(ctxt, 1); | ||
| 1077 | } | ||
| 1078 | |||
| 1079 | return ret; | ||
| 1080 | } | ||
