diff options
Diffstat (limited to 'net/sunrpc')
26 files changed, 2845 insertions, 1606 deletions
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 46a2ce00a29b..cdcab9ca4c60 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | obj-$(CONFIG_SUNRPC) += sunrpc.o | 6 | obj-$(CONFIG_SUNRPC) += sunrpc.o |
| 7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ | 7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ |
| 8 | 8 | ||
| 9 | sunrpc-y := clnt.o xprt.o sched.o \ | 9 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ |
| 10 | auth.o auth_null.o auth_unix.o \ | 10 | auth.o auth_null.o auth_unix.o \ |
| 11 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | 11 | svc.o svcsock.o svcauth.o svcauth_unix.o \ |
| 12 | pmap_clnt.o timer.o xdr.o \ | 12 | pmap_clnt.o timer.o xdr.o \ |
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 505e2d4b3d62..a415d99c394d 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
| @@ -11,7 +11,6 @@ | |||
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
| 14 | #include <linux/socket.h> | ||
| 15 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
| 16 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
| 17 | 16 | ||
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index fe1b874084bc..f3431a7e33da 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile | |||
| @@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ | |||
| 10 | obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o | 10 | obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o |
| 11 | 11 | ||
| 12 | rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ | 12 | rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ |
| 13 | gss_krb5_seqnum.o | 13 | gss_krb5_seqnum.o gss_krb5_wrap.o |
| 14 | 14 | ||
| 15 | obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o | 15 | obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o |
| 16 | 16 | ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 2f7b867161d2..f44f46f1d8e0 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
| @@ -42,9 +42,8 @@ | |||
| 42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
| 43 | #include <linux/types.h> | 43 | #include <linux/types.h> |
| 44 | #include <linux/slab.h> | 44 | #include <linux/slab.h> |
| 45 | #include <linux/socket.h> | ||
| 46 | #include <linux/in.h> | ||
| 47 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
| 46 | #include <linux/pagemap.h> | ||
| 48 | #include <linux/sunrpc/clnt.h> | 47 | #include <linux/sunrpc/clnt.h> |
| 49 | #include <linux/sunrpc/auth.h> | 48 | #include <linux/sunrpc/auth.h> |
| 50 | #include <linux/sunrpc/auth_gss.h> | 49 | #include <linux/sunrpc/auth_gss.h> |
| @@ -846,10 +845,8 @@ gss_marshal(struct rpc_task *task, u32 *p) | |||
| 846 | 845 | ||
| 847 | /* We compute the checksum for the verifier over the xdr-encoded bytes | 846 | /* We compute the checksum for the verifier over the xdr-encoded bytes |
| 848 | * starting with the xid and ending at the end of the credential: */ | 847 | * starting with the xid and ending at the end of the credential: */ |
| 849 | iov.iov_base = req->rq_snd_buf.head[0].iov_base; | 848 | iov.iov_base = xprt_skip_transport_header(task->tk_xprt, |
| 850 | if (task->tk_client->cl_xprt->stream) | 849 | req->rq_snd_buf.head[0].iov_base); |
| 851 | /* See clnt.c:call_header() */ | ||
| 852 | iov.iov_base += 4; | ||
| 853 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; | 850 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; |
| 854 | xdr_buf_from_iov(&iov, &verf_buf); | 851 | xdr_buf_from_iov(&iov, &verf_buf); |
| 855 | 852 | ||
| @@ -857,9 +854,7 @@ gss_marshal(struct rpc_task *task, u32 *p) | |||
| 857 | *p++ = htonl(RPC_AUTH_GSS); | 854 | *p++ = htonl(RPC_AUTH_GSS); |
| 858 | 855 | ||
| 859 | mic.data = (u8 *)(p + 1); | 856 | mic.data = (u8 *)(p + 1); |
| 860 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | 857 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); |
| 861 | GSS_C_QOP_DEFAULT, | ||
| 862 | &verf_buf, &mic); | ||
| 863 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) { | 858 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) { |
| 864 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 859 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
| 865 | } else if (maj_stat != 0) { | 860 | } else if (maj_stat != 0) { |
| @@ -890,10 +885,8 @@ static u32 * | |||
| 890 | gss_validate(struct rpc_task *task, u32 *p) | 885 | gss_validate(struct rpc_task *task, u32 *p) |
| 891 | { | 886 | { |
| 892 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | 887 | struct rpc_cred *cred = task->tk_msg.rpc_cred; |
| 893 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | ||
| 894 | gc_base); | ||
| 895 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | 888 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); |
| 896 | u32 seq, qop_state; | 889 | u32 seq; |
| 897 | struct kvec iov; | 890 | struct kvec iov; |
| 898 | struct xdr_buf verf_buf; | 891 | struct xdr_buf verf_buf; |
| 899 | struct xdr_netobj mic; | 892 | struct xdr_netobj mic; |
| @@ -914,23 +907,14 @@ gss_validate(struct rpc_task *task, u32 *p) | |||
| 914 | mic.data = (u8 *)p; | 907 | mic.data = (u8 *)p; |
| 915 | mic.len = len; | 908 | mic.len = len; |
| 916 | 909 | ||
| 917 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); | 910 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); |
| 918 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 911 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
| 919 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 912 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
| 920 | if (maj_stat) | 913 | if (maj_stat) |
| 921 | goto out_bad; | 914 | goto out_bad; |
| 922 | switch (gss_cred->gc_service) { | 915 | /* We leave it to unwrap to calculate au_rslack. For now we just |
| 923 | case RPC_GSS_SVC_NONE: | 916 | * calculate the length of the verifier: */ |
| 924 | /* verifier data, flavor, length: */ | 917 | task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; |
| 925 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; | ||
| 926 | break; | ||
| 927 | case RPC_GSS_SVC_INTEGRITY: | ||
| 928 | /* verifier data, flavor, length, length, sequence number: */ | ||
| 929 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; | ||
| 930 | break; | ||
| 931 | case RPC_GSS_SVC_PRIVACY: | ||
| 932 | goto out_bad; | ||
| 933 | } | ||
| 934 | gss_put_ctx(ctx); | 918 | gss_put_ctx(ctx); |
| 935 | dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", | 919 | dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", |
| 936 | task->tk_pid); | 920 | task->tk_pid); |
| @@ -975,8 +959,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
| 975 | p = iov->iov_base + iov->iov_len; | 959 | p = iov->iov_base + iov->iov_len; |
| 976 | mic.data = (u8 *)(p + 1); | 960 | mic.data = (u8 *)(p + 1); |
| 977 | 961 | ||
| 978 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | 962 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); |
| 979 | GSS_C_QOP_DEFAULT, &integ_buf, &mic); | ||
| 980 | status = -EIO; /* XXX? */ | 963 | status = -EIO; /* XXX? */ |
| 981 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 964 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
| 982 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 965 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
| @@ -990,6 +973,113 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
| 990 | return 0; | 973 | return 0; |
| 991 | } | 974 | } |
| 992 | 975 | ||
| 976 | static void | ||
| 977 | priv_release_snd_buf(struct rpc_rqst *rqstp) | ||
| 978 | { | ||
| 979 | int i; | ||
| 980 | |||
| 981 | for (i=0; i < rqstp->rq_enc_pages_num; i++) | ||
| 982 | __free_page(rqstp->rq_enc_pages[i]); | ||
| 983 | kfree(rqstp->rq_enc_pages); | ||
| 984 | } | ||
| 985 | |||
| 986 | static int | ||
| 987 | alloc_enc_pages(struct rpc_rqst *rqstp) | ||
| 988 | { | ||
| 989 | struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; | ||
| 990 | int first, last, i; | ||
| 991 | |||
| 992 | if (snd_buf->page_len == 0) { | ||
| 993 | rqstp->rq_enc_pages_num = 0; | ||
| 994 | return 0; | ||
| 995 | } | ||
| 996 | |||
| 997 | first = snd_buf->page_base >> PAGE_CACHE_SHIFT; | ||
| 998 | last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; | ||
| 999 | rqstp->rq_enc_pages_num = last - first + 1 + 1; | ||
| 1000 | rqstp->rq_enc_pages | ||
| 1001 | = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), | ||
| 1002 | GFP_NOFS); | ||
| 1003 | if (!rqstp->rq_enc_pages) | ||
| 1004 | goto out; | ||
| 1005 | for (i=0; i < rqstp->rq_enc_pages_num; i++) { | ||
| 1006 | rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); | ||
| 1007 | if (rqstp->rq_enc_pages[i] == NULL) | ||
| 1008 | goto out_free; | ||
| 1009 | } | ||
| 1010 | rqstp->rq_release_snd_buf = priv_release_snd_buf; | ||
| 1011 | return 0; | ||
| 1012 | out_free: | ||
| 1013 | for (i--; i >= 0; i--) { | ||
| 1014 | __free_page(rqstp->rq_enc_pages[i]); | ||
| 1015 | } | ||
| 1016 | out: | ||
| 1017 | return -EAGAIN; | ||
| 1018 | } | ||
| 1019 | |||
| 1020 | static inline int | ||
| 1021 | gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
| 1022 | kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) | ||
| 1023 | { | ||
| 1024 | struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; | ||
| 1025 | u32 offset; | ||
| 1026 | u32 maj_stat; | ||
| 1027 | int status; | ||
| 1028 | u32 *opaque_len; | ||
| 1029 | struct page **inpages; | ||
| 1030 | int first; | ||
| 1031 | int pad; | ||
| 1032 | struct kvec *iov; | ||
| 1033 | char *tmp; | ||
| 1034 | |||
| 1035 | opaque_len = p++; | ||
| 1036 | offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; | ||
| 1037 | *p++ = htonl(rqstp->rq_seqno); | ||
| 1038 | |||
| 1039 | status = encode(rqstp, p, obj); | ||
| 1040 | if (status) | ||
| 1041 | return status; | ||
| 1042 | |||
| 1043 | status = alloc_enc_pages(rqstp); | ||
| 1044 | if (status) | ||
| 1045 | return status; | ||
| 1046 | first = snd_buf->page_base >> PAGE_CACHE_SHIFT; | ||
| 1047 | inpages = snd_buf->pages + first; | ||
| 1048 | snd_buf->pages = rqstp->rq_enc_pages; | ||
| 1049 | snd_buf->page_base -= first << PAGE_CACHE_SHIFT; | ||
| 1050 | /* Give the tail its own page, in case we need extra space in the | ||
| 1051 | * head when wrapping: */ | ||
| 1052 | if (snd_buf->page_len || snd_buf->tail[0].iov_len) { | ||
| 1053 | tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); | ||
| 1054 | memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); | ||
| 1055 | snd_buf->tail[0].iov_base = tmp; | ||
| 1056 | } | ||
| 1057 | maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); | ||
| 1058 | /* RPC_SLACK_SPACE should prevent this ever happening: */ | ||
| 1059 | BUG_ON(snd_buf->len > snd_buf->buflen); | ||
| 1060 | status = -EIO; | ||
| 1061 | /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was | ||
| 1062 | * done anyway, so it's safe to put the request on the wire: */ | ||
| 1063 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
| 1064 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
| 1065 | else if (maj_stat) | ||
| 1066 | return status; | ||
| 1067 | |||
| 1068 | *opaque_len = htonl(snd_buf->len - offset); | ||
| 1069 | /* guess whether we're in the head or the tail: */ | ||
| 1070 | if (snd_buf->page_len || snd_buf->tail[0].iov_len) | ||
| 1071 | iov = snd_buf->tail; | ||
| 1072 | else | ||
| 1073 | iov = snd_buf->head; | ||
| 1074 | p = iov->iov_base + iov->iov_len; | ||
| 1075 | pad = 3 - ((snd_buf->len - offset - 1) & 3); | ||
| 1076 | memset(p, 0, pad); | ||
| 1077 | iov->iov_len += pad; | ||
| 1078 | snd_buf->len += pad; | ||
| 1079 | |||
| 1080 | return 0; | ||
| 1081 | } | ||
| 1082 | |||
| 993 | static int | 1083 | static int |
| 994 | gss_wrap_req(struct rpc_task *task, | 1084 | gss_wrap_req(struct rpc_task *task, |
| 995 | kxdrproc_t encode, void *rqstp, u32 *p, void *obj) | 1085 | kxdrproc_t encode, void *rqstp, u32 *p, void *obj) |
| @@ -1017,6 +1107,8 @@ gss_wrap_req(struct rpc_task *task, | |||
| 1017 | rqstp, p, obj); | 1107 | rqstp, p, obj); |
| 1018 | break; | 1108 | break; |
| 1019 | case RPC_GSS_SVC_PRIVACY: | 1109 | case RPC_GSS_SVC_PRIVACY: |
| 1110 | status = gss_wrap_req_priv(cred, ctx, encode, | ||
| 1111 | rqstp, p, obj); | ||
| 1020 | break; | 1112 | break; |
| 1021 | } | 1113 | } |
| 1022 | out: | 1114 | out: |
| @@ -1054,8 +1146,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
| 1054 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) | 1146 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) |
| 1055 | return status; | 1147 | return status; |
| 1056 | 1148 | ||
| 1057 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, | 1149 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); |
| 1058 | &mic, NULL); | ||
| 1059 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 1150 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
| 1060 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 1151 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
| 1061 | if (maj_stat != GSS_S_COMPLETE) | 1152 | if (maj_stat != GSS_S_COMPLETE) |
| @@ -1063,6 +1154,35 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
| 1063 | return 0; | 1154 | return 0; |
| 1064 | } | 1155 | } |
| 1065 | 1156 | ||
| 1157 | static inline int | ||
| 1158 | gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
| 1159 | struct rpc_rqst *rqstp, u32 **p) | ||
| 1160 | { | ||
| 1161 | struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; | ||
| 1162 | u32 offset; | ||
| 1163 | u32 opaque_len; | ||
| 1164 | u32 maj_stat; | ||
| 1165 | int status = -EIO; | ||
| 1166 | |||
| 1167 | opaque_len = ntohl(*(*p)++); | ||
| 1168 | offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; | ||
| 1169 | if (offset + opaque_len > rcv_buf->len) | ||
| 1170 | return status; | ||
| 1171 | /* remove padding: */ | ||
| 1172 | rcv_buf->len = offset + opaque_len; | ||
| 1173 | |||
| 1174 | maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); | ||
| 1175 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
| 1176 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
| 1177 | if (maj_stat != GSS_S_COMPLETE) | ||
| 1178 | return status; | ||
| 1179 | if (ntohl(*(*p)++) != rqstp->rq_seqno) | ||
| 1180 | return status; | ||
| 1181 | |||
| 1182 | return 0; | ||
| 1183 | } | ||
| 1184 | |||
| 1185 | |||
| 1066 | static int | 1186 | static int |
| 1067 | gss_unwrap_resp(struct rpc_task *task, | 1187 | gss_unwrap_resp(struct rpc_task *task, |
| 1068 | kxdrproc_t decode, void *rqstp, u32 *p, void *obj) | 1188 | kxdrproc_t decode, void *rqstp, u32 *p, void *obj) |
| @@ -1071,6 +1191,9 @@ gss_unwrap_resp(struct rpc_task *task, | |||
| 1071 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | 1191 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, |
| 1072 | gc_base); | 1192 | gc_base); |
| 1073 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | 1193 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); |
| 1194 | u32 *savedp = p; | ||
| 1195 | struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; | ||
| 1196 | int savedlen = head->iov_len; | ||
| 1074 | int status = -EIO; | 1197 | int status = -EIO; |
| 1075 | 1198 | ||
| 1076 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) | 1199 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) |
| @@ -1084,8 +1207,14 @@ gss_unwrap_resp(struct rpc_task *task, | |||
| 1084 | goto out; | 1207 | goto out; |
| 1085 | break; | 1208 | break; |
| 1086 | case RPC_GSS_SVC_PRIVACY: | 1209 | case RPC_GSS_SVC_PRIVACY: |
| 1210 | status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); | ||
| 1211 | if (status) | ||
| 1212 | goto out; | ||
| 1087 | break; | 1213 | break; |
| 1088 | } | 1214 | } |
| 1215 | /* take into account extra slack for integrity and privacy cases: */ | ||
| 1216 | task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) | ||
| 1217 | + (savedlen - head->iov_len); | ||
| 1089 | out_decode: | 1218 | out_decode: |
| 1090 | status = decode(rqstp, p, obj); | 1219 | status = decode(rqstp, p, obj); |
| 1091 | out: | 1220 | out: |
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index ee6ae74cd1b2..3f3d5437f02d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c | |||
| @@ -139,17 +139,91 @@ buf_to_sg(struct scatterlist *sg, char *ptr, int len) { | |||
| 139 | sg->length = len; | 139 | sg->length = len; |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | static int | ||
| 143 | process_xdr_buf(struct xdr_buf *buf, int offset, int len, | ||
| 144 | int (*actor)(struct scatterlist *, void *), void *data) | ||
| 145 | { | ||
| 146 | int i, page_len, thislen, page_offset, ret = 0; | ||
| 147 | struct scatterlist sg[1]; | ||
| 148 | |||
| 149 | if (offset >= buf->head[0].iov_len) { | ||
| 150 | offset -= buf->head[0].iov_len; | ||
| 151 | } else { | ||
| 152 | thislen = buf->head[0].iov_len - offset; | ||
| 153 | if (thislen > len) | ||
| 154 | thislen = len; | ||
| 155 | buf_to_sg(sg, buf->head[0].iov_base + offset, thislen); | ||
| 156 | ret = actor(sg, data); | ||
| 157 | if (ret) | ||
| 158 | goto out; | ||
| 159 | offset = 0; | ||
| 160 | len -= thislen; | ||
| 161 | } | ||
| 162 | if (len == 0) | ||
| 163 | goto out; | ||
| 164 | |||
| 165 | if (offset >= buf->page_len) { | ||
| 166 | offset -= buf->page_len; | ||
| 167 | } else { | ||
| 168 | page_len = buf->page_len - offset; | ||
| 169 | if (page_len > len) | ||
| 170 | page_len = len; | ||
| 171 | len -= page_len; | ||
| 172 | page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); | ||
| 173 | i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; | ||
| 174 | thislen = PAGE_CACHE_SIZE - page_offset; | ||
| 175 | do { | ||
| 176 | if (thislen > page_len) | ||
| 177 | thislen = page_len; | ||
| 178 | sg->page = buf->pages[i]; | ||
| 179 | sg->offset = page_offset; | ||
| 180 | sg->length = thislen; | ||
| 181 | ret = actor(sg, data); | ||
| 182 | if (ret) | ||
| 183 | goto out; | ||
| 184 | page_len -= thislen; | ||
| 185 | i++; | ||
| 186 | page_offset = 0; | ||
| 187 | thislen = PAGE_CACHE_SIZE; | ||
| 188 | } while (page_len != 0); | ||
| 189 | offset = 0; | ||
| 190 | } | ||
| 191 | if (len == 0) | ||
| 192 | goto out; | ||
| 193 | |||
| 194 | if (offset < buf->tail[0].iov_len) { | ||
| 195 | thislen = buf->tail[0].iov_len - offset; | ||
| 196 | if (thislen > len) | ||
| 197 | thislen = len; | ||
| 198 | buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen); | ||
| 199 | ret = actor(sg, data); | ||
| 200 | len -= thislen; | ||
| 201 | } | ||
| 202 | if (len != 0) | ||
| 203 | ret = -EINVAL; | ||
| 204 | out: | ||
| 205 | return ret; | ||
| 206 | } | ||
| 207 | |||
| 208 | static int | ||
| 209 | checksummer(struct scatterlist *sg, void *data) | ||
| 210 | { | ||
| 211 | struct crypto_tfm *tfm = (struct crypto_tfm *)data; | ||
| 212 | |||
| 213 | crypto_digest_update(tfm, sg, 1); | ||
| 214 | |||
| 215 | return 0; | ||
| 216 | } | ||
| 217 | |||
| 142 | /* checksum the plaintext data and hdrlen bytes of the token header */ | 218 | /* checksum the plaintext data and hdrlen bytes of the token header */ |
| 143 | s32 | 219 | s32 |
| 144 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | 220 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, |
| 145 | struct xdr_netobj *cksum) | 221 | int body_offset, struct xdr_netobj *cksum) |
| 146 | { | 222 | { |
| 147 | char *cksumname; | 223 | char *cksumname; |
| 148 | struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ | 224 | struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ |
| 149 | struct scatterlist sg[1]; | 225 | struct scatterlist sg[1]; |
| 150 | u32 code = GSS_S_FAILURE; | 226 | u32 code = GSS_S_FAILURE; |
| 151 | int len, thislen, offset; | ||
| 152 | int i; | ||
| 153 | 227 | ||
| 154 | switch (cksumtype) { | 228 | switch (cksumtype) { |
| 155 | case CKSUMTYPE_RSA_MD5: | 229 | case CKSUMTYPE_RSA_MD5: |
| @@ -169,33 +243,8 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | |||
| 169 | crypto_digest_init(tfm); | 243 | crypto_digest_init(tfm); |
| 170 | buf_to_sg(sg, header, hdrlen); | 244 | buf_to_sg(sg, header, hdrlen); |
| 171 | crypto_digest_update(tfm, sg, 1); | 245 | crypto_digest_update(tfm, sg, 1); |
| 172 | if (body->head[0].iov_len) { | 246 | process_xdr_buf(body, body_offset, body->len - body_offset, |
| 173 | buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); | 247 | checksummer, tfm); |
| 174 | crypto_digest_update(tfm, sg, 1); | ||
| 175 | } | ||
| 176 | |||
| 177 | len = body->page_len; | ||
| 178 | if (len != 0) { | ||
| 179 | offset = body->page_base & (PAGE_CACHE_SIZE - 1); | ||
| 180 | i = body->page_base >> PAGE_CACHE_SHIFT; | ||
| 181 | thislen = PAGE_CACHE_SIZE - offset; | ||
| 182 | do { | ||
| 183 | if (thislen > len) | ||
| 184 | thislen = len; | ||
| 185 | sg->page = body->pages[i]; | ||
| 186 | sg->offset = offset; | ||
| 187 | sg->length = thislen; | ||
| 188 | crypto_digest_update(tfm, sg, 1); | ||
| 189 | len -= thislen; | ||
| 190 | i++; | ||
| 191 | offset = 0; | ||
| 192 | thislen = PAGE_CACHE_SIZE; | ||
| 193 | } while(len != 0); | ||
| 194 | } | ||
| 195 | if (body->tail[0].iov_len) { | ||
| 196 | buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); | ||
| 197 | crypto_digest_update(tfm, sg, 1); | ||
| 198 | } | ||
| 199 | crypto_digest_final(tfm, cksum->data); | 248 | crypto_digest_final(tfm, cksum->data); |
| 200 | code = 0; | 249 | code = 0; |
| 201 | out: | 250 | out: |
| @@ -204,3 +253,154 @@ out: | |||
| 204 | } | 253 | } |
| 205 | 254 | ||
| 206 | EXPORT_SYMBOL(make_checksum); | 255 | EXPORT_SYMBOL(make_checksum); |
| 256 | |||
| 257 | struct encryptor_desc { | ||
| 258 | u8 iv[8]; /* XXX hard-coded blocksize */ | ||
| 259 | struct crypto_tfm *tfm; | ||
| 260 | int pos; | ||
| 261 | struct xdr_buf *outbuf; | ||
| 262 | struct page **pages; | ||
| 263 | struct scatterlist infrags[4]; | ||
| 264 | struct scatterlist outfrags[4]; | ||
| 265 | int fragno; | ||
| 266 | int fraglen; | ||
| 267 | }; | ||
| 268 | |||
| 269 | static int | ||
| 270 | encryptor(struct scatterlist *sg, void *data) | ||
| 271 | { | ||
| 272 | struct encryptor_desc *desc = data; | ||
| 273 | struct xdr_buf *outbuf = desc->outbuf; | ||
| 274 | struct page *in_page; | ||
| 275 | int thislen = desc->fraglen + sg->length; | ||
| 276 | int fraglen, ret; | ||
| 277 | int page_pos; | ||
| 278 | |||
| 279 | /* Worst case is 4 fragments: head, end of page 1, start | ||
| 280 | * of page 2, tail. Anything more is a bug. */ | ||
| 281 | BUG_ON(desc->fragno > 3); | ||
| 282 | desc->infrags[desc->fragno] = *sg; | ||
| 283 | desc->outfrags[desc->fragno] = *sg; | ||
| 284 | |||
| 285 | page_pos = desc->pos - outbuf->head[0].iov_len; | ||
| 286 | if (page_pos >= 0 && page_pos < outbuf->page_len) { | ||
| 287 | /* pages are not in place: */ | ||
| 288 | int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; | ||
| 289 | in_page = desc->pages[i]; | ||
| 290 | } else { | ||
| 291 | in_page = sg->page; | ||
| 292 | } | ||
| 293 | desc->infrags[desc->fragno].page = in_page; | ||
| 294 | desc->fragno++; | ||
| 295 | desc->fraglen += sg->length; | ||
| 296 | desc->pos += sg->length; | ||
| 297 | |||
| 298 | fraglen = thislen & 7; /* XXX hardcoded blocksize */ | ||
| 299 | thislen -= fraglen; | ||
| 300 | |||
| 301 | if (thislen == 0) | ||
| 302 | return 0; | ||
| 303 | |||
| 304 | ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, | ||
| 305 | thislen, desc->iv); | ||
| 306 | if (ret) | ||
| 307 | return ret; | ||
| 308 | if (fraglen) { | ||
| 309 | desc->outfrags[0].page = sg->page; | ||
| 310 | desc->outfrags[0].offset = sg->offset + sg->length - fraglen; | ||
| 311 | desc->outfrags[0].length = fraglen; | ||
| 312 | desc->infrags[0] = desc->outfrags[0]; | ||
| 313 | desc->infrags[0].page = in_page; | ||
| 314 | desc->fragno = 1; | ||
| 315 | desc->fraglen = fraglen; | ||
| 316 | } else { | ||
| 317 | desc->fragno = 0; | ||
| 318 | desc->fraglen = 0; | ||
| 319 | } | ||
| 320 | return 0; | ||
| 321 | } | ||
| 322 | |||
| 323 | int | ||
| 324 | gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, | ||
| 325 | struct page **pages) | ||
| 326 | { | ||
| 327 | int ret; | ||
| 328 | struct encryptor_desc desc; | ||
| 329 | |||
| 330 | BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); | ||
| 331 | |||
| 332 | memset(desc.iv, 0, sizeof(desc.iv)); | ||
| 333 | desc.tfm = tfm; | ||
| 334 | desc.pos = offset; | ||
| 335 | desc.outbuf = buf; | ||
| 336 | desc.pages = pages; | ||
| 337 | desc.fragno = 0; | ||
| 338 | desc.fraglen = 0; | ||
| 339 | |||
| 340 | ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); | ||
| 341 | return ret; | ||
| 342 | } | ||
| 343 | |||
| 344 | EXPORT_SYMBOL(gss_encrypt_xdr_buf); | ||
| 345 | |||
| 346 | struct decryptor_desc { | ||
| 347 | u8 iv[8]; /* XXX hard-coded blocksize */ | ||
| 348 | struct crypto_tfm *tfm; | ||
| 349 | struct scatterlist frags[4]; | ||
| 350 | int fragno; | ||
| 351 | int fraglen; | ||
| 352 | }; | ||
| 353 | |||
| 354 | static int | ||
| 355 | decryptor(struct scatterlist *sg, void *data) | ||
| 356 | { | ||
| 357 | struct decryptor_desc *desc = data; | ||
| 358 | int thislen = desc->fraglen + sg->length; | ||
| 359 | int fraglen, ret; | ||
| 360 | |||
| 361 | /* Worst case is 4 fragments: head, end of page 1, start | ||
| 362 | * of page 2, tail. Anything more is a bug. */ | ||
| 363 | BUG_ON(desc->fragno > 3); | ||
| 364 | desc->frags[desc->fragno] = *sg; | ||
| 365 | desc->fragno++; | ||
| 366 | desc->fraglen += sg->length; | ||
| 367 | |||
| 368 | fraglen = thislen & 7; /* XXX hardcoded blocksize */ | ||
| 369 | thislen -= fraglen; | ||
| 370 | |||
| 371 | if (thislen == 0) | ||
| 372 | return 0; | ||
| 373 | |||
| 374 | ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, | ||
| 375 | thislen, desc->iv); | ||
| 376 | if (ret) | ||
| 377 | return ret; | ||
| 378 | if (fraglen) { | ||
| 379 | desc->frags[0].page = sg->page; | ||
| 380 | desc->frags[0].offset = sg->offset + sg->length - fraglen; | ||
| 381 | desc->frags[0].length = fraglen; | ||
| 382 | desc->fragno = 1; | ||
| 383 | desc->fraglen = fraglen; | ||
| 384 | } else { | ||
| 385 | desc->fragno = 0; | ||
| 386 | desc->fraglen = 0; | ||
| 387 | } | ||
| 388 | return 0; | ||
| 389 | } | ||
| 390 | |||
| 391 | int | ||
| 392 | gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) | ||
| 393 | { | ||
| 394 | struct decryptor_desc desc; | ||
| 395 | |||
| 396 | /* XXXJBF: */ | ||
| 397 | BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); | ||
| 398 | |||
| 399 | memset(desc.iv, 0, sizeof(desc.iv)); | ||
| 400 | desc.tfm = tfm; | ||
| 401 | desc.fragno = 0; | ||
| 402 | desc.fraglen = 0; | ||
| 403 | return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); | ||
| 404 | } | ||
| 405 | |||
| 406 | EXPORT_SYMBOL(gss_decrypt_xdr_buf); | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 606a8a82cafb..5f1f806a0b11 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c | |||
| @@ -39,7 +39,6 @@ | |||
| 39 | #include <linux/types.h> | 39 | #include <linux/types.h> |
| 40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
| 41 | #include <linux/sunrpc/auth.h> | 41 | #include <linux/sunrpc/auth.h> |
| 42 | #include <linux/in.h> | ||
| 43 | #include <linux/sunrpc/gss_krb5.h> | 42 | #include <linux/sunrpc/gss_krb5.h> |
| 44 | #include <linux/sunrpc/xdr.h> | 43 | #include <linux/sunrpc/xdr.h> |
| 45 | #include <linux/crypto.h> | 44 | #include <linux/crypto.h> |
| @@ -191,43 +190,12 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { | |||
| 191 | kfree(kctx); | 190 | kfree(kctx); |
| 192 | } | 191 | } |
| 193 | 192 | ||
| 194 | static u32 | ||
| 195 | gss_verify_mic_kerberos(struct gss_ctx *ctx, | ||
| 196 | struct xdr_buf *message, | ||
| 197 | struct xdr_netobj *mic_token, | ||
| 198 | u32 *qstate) { | ||
| 199 | u32 maj_stat = 0; | ||
| 200 | int qop_state; | ||
| 201 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
| 202 | |||
| 203 | maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, | ||
| 204 | KG_TOK_MIC_MSG); | ||
| 205 | if (!maj_stat && qop_state) | ||
| 206 | *qstate = qop_state; | ||
| 207 | |||
| 208 | dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); | ||
| 209 | return maj_stat; | ||
| 210 | } | ||
| 211 | |||
| 212 | static u32 | ||
| 213 | gss_get_mic_kerberos(struct gss_ctx *ctx, | ||
| 214 | u32 qop, | ||
| 215 | struct xdr_buf *message, | ||
| 216 | struct xdr_netobj *mic_token) { | ||
| 217 | u32 err = 0; | ||
| 218 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
| 219 | |||
| 220 | err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); | ||
| 221 | |||
| 222 | dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); | ||
| 223 | |||
| 224 | return err; | ||
| 225 | } | ||
| 226 | |||
| 227 | static struct gss_api_ops gss_kerberos_ops = { | 193 | static struct gss_api_ops gss_kerberos_ops = { |
| 228 | .gss_import_sec_context = gss_import_sec_context_kerberos, | 194 | .gss_import_sec_context = gss_import_sec_context_kerberos, |
| 229 | .gss_get_mic = gss_get_mic_kerberos, | 195 | .gss_get_mic = gss_get_mic_kerberos, |
| 230 | .gss_verify_mic = gss_verify_mic_kerberos, | 196 | .gss_verify_mic = gss_verify_mic_kerberos, |
| 197 | .gss_wrap = gss_wrap_kerberos, | ||
| 198 | .gss_unwrap = gss_unwrap_kerberos, | ||
| 231 | .gss_delete_sec_context = gss_delete_sec_context_kerberos, | 199 | .gss_delete_sec_context = gss_delete_sec_context_kerberos, |
| 232 | }; | 200 | }; |
| 233 | 201 | ||
| @@ -242,6 +210,11 @@ static struct pf_desc gss_kerberos_pfs[] = { | |||
| 242 | .service = RPC_GSS_SVC_INTEGRITY, | 210 | .service = RPC_GSS_SVC_INTEGRITY, |
| 243 | .name = "krb5i", | 211 | .name = "krb5i", |
| 244 | }, | 212 | }, |
| 213 | [2] = { | ||
| 214 | .pseudoflavor = RPC_AUTH_GSS_KRB5P, | ||
| 215 | .service = RPC_GSS_SVC_PRIVACY, | ||
| 216 | .name = "krb5p", | ||
| 217 | }, | ||
| 245 | }; | 218 | }; |
| 246 | 219 | ||
| 247 | static struct gss_api_mech gss_kerberos_mech = { | 220 | static struct gss_api_mech gss_kerberos_mech = { |
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index afeeb8715a77..13f8ae979454 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c | |||
| @@ -70,22 +70,13 @@ | |||
| 70 | # define RPCDBG_FACILITY RPCDBG_AUTH | 70 | # define RPCDBG_FACILITY RPCDBG_AUTH |
| 71 | #endif | 71 | #endif |
| 72 | 72 | ||
| 73 | static inline int | ||
| 74 | gss_krb5_padding(int blocksize, int length) { | ||
| 75 | /* Most of the code is block-size independent but in practice we | ||
| 76 | * use only 8: */ | ||
| 77 | BUG_ON(blocksize != 8); | ||
| 78 | return 8 - (length & 7); | ||
| 79 | } | ||
| 80 | |||
| 81 | u32 | 73 | u32 |
| 82 | krb5_make_token(struct krb5_ctx *ctx, int qop_req, | 74 | gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, |
| 83 | struct xdr_buf *text, struct xdr_netobj *token, | 75 | struct xdr_netobj *token) |
| 84 | int toktype) | ||
| 85 | { | 76 | { |
| 77 | struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; | ||
| 86 | s32 checksum_type; | 78 | s32 checksum_type; |
| 87 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | 79 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; |
| 88 | int blocksize = 0, tmsglen; | ||
| 89 | unsigned char *ptr, *krb5_hdr, *msg_start; | 80 | unsigned char *ptr, *krb5_hdr, *msg_start; |
| 90 | s32 now; | 81 | s32 now; |
| 91 | 82 | ||
| @@ -93,9 +84,6 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
| 93 | 84 | ||
| 94 | now = get_seconds(); | 85 | now = get_seconds(); |
| 95 | 86 | ||
| 96 | if (qop_req != 0) | ||
| 97 | goto out_err; | ||
| 98 | |||
| 99 | switch (ctx->signalg) { | 87 | switch (ctx->signalg) { |
| 100 | case SGN_ALG_DES_MAC_MD5: | 88 | case SGN_ALG_DES_MAC_MD5: |
| 101 | checksum_type = CKSUMTYPE_RSA_MD5; | 89 | checksum_type = CKSUMTYPE_RSA_MD5; |
| @@ -111,21 +99,13 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
| 111 | goto out_err; | 99 | goto out_err; |
| 112 | } | 100 | } |
| 113 | 101 | ||
| 114 | if (toktype == KG_TOK_WRAP_MSG) { | 102 | token->len = g_token_size(&ctx->mech_used, 22); |
| 115 | blocksize = crypto_tfm_alg_blocksize(ctx->enc); | ||
| 116 | tmsglen = blocksize + text->len | ||
| 117 | + gss_krb5_padding(blocksize, blocksize + text->len); | ||
| 118 | } else { | ||
| 119 | tmsglen = 0; | ||
| 120 | } | ||
| 121 | |||
| 122 | token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); | ||
| 123 | 103 | ||
| 124 | ptr = token->data; | 104 | ptr = token->data; |
| 125 | g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); | 105 | g_make_token_header(&ctx->mech_used, 22, &ptr); |
| 126 | 106 | ||
| 127 | *ptr++ = (unsigned char) ((toktype>>8)&0xff); | 107 | *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); |
| 128 | *ptr++ = (unsigned char) (toktype&0xff); | 108 | *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); |
| 129 | 109 | ||
| 130 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ | 110 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ |
| 131 | krb5_hdr = ptr - 2; | 111 | krb5_hdr = ptr - 2; |
| @@ -133,17 +113,9 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
| 133 | 113 | ||
| 134 | *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); | 114 | *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); |
| 135 | memset(krb5_hdr + 4, 0xff, 4); | 115 | memset(krb5_hdr + 4, 0xff, 4); |
| 136 | if (toktype == KG_TOK_WRAP_MSG) | ||
| 137 | *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); | ||
| 138 | 116 | ||
| 139 | if (toktype == KG_TOK_WRAP_MSG) { | 117 | if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) |
| 140 | /* XXX removing support for now */ | ||
| 141 | goto out_err; | ||
| 142 | } else { /* Sign only. */ | ||
| 143 | if (make_checksum(checksum_type, krb5_hdr, 8, text, | ||
| 144 | &md5cksum)) | ||
| 145 | goto out_err; | 118 | goto out_err; |
| 146 | } | ||
| 147 | 119 | ||
| 148 | switch (ctx->signalg) { | 120 | switch (ctx->signalg) { |
| 149 | case SGN_ALG_DES_MAC_MD5: | 121 | case SGN_ALG_DES_MAC_MD5: |
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 8767fc53183d..2030475d98ed 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c | |||
| @@ -68,21 +68,14 @@ | |||
| 68 | #endif | 68 | #endif |
| 69 | 69 | ||
| 70 | 70 | ||
| 71 | /* message_buffer is an input if toktype is MIC and an output if it is WRAP: | 71 | /* read_token is a mic token, and message_buffer is the data that the mic was |
| 72 | * If toktype is MIC: read_token is a mic token, and message_buffer is the | 72 | * supposedly taken over. */ |
| 73 | * data that the mic was supposedly taken over. | ||
| 74 | * If toktype is WRAP: read_token is a wrap token, and message_buffer is used | ||
| 75 | * to return the decrypted data. | ||
| 76 | */ | ||
| 77 | 73 | ||
| 78 | /* XXX will need to change prototype and/or just split into a separate function | ||
| 79 | * when we add privacy (because read_token will be in pages too). */ | ||
| 80 | u32 | 74 | u32 |
| 81 | krb5_read_token(struct krb5_ctx *ctx, | 75 | gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, |
| 82 | struct xdr_netobj *read_token, | 76 | struct xdr_buf *message_buffer, struct xdr_netobj *read_token) |
| 83 | struct xdr_buf *message_buffer, | ||
| 84 | int *qop_state, int toktype) | ||
| 85 | { | 77 | { |
| 78 | struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; | ||
| 86 | int signalg; | 79 | int signalg; |
| 87 | int sealalg; | 80 | int sealalg; |
| 88 | s32 checksum_type; | 81 | s32 checksum_type; |
| @@ -100,16 +93,12 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
| 100 | read_token->len)) | 93 | read_token->len)) |
| 101 | goto out; | 94 | goto out; |
| 102 | 95 | ||
| 103 | if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) | 96 | if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || |
| 97 | (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) | ||
| 104 | goto out; | 98 | goto out; |
| 105 | 99 | ||
| 106 | /* XXX sanity-check bodysize?? */ | 100 | /* XXX sanity-check bodysize?? */ |
| 107 | 101 | ||
| 108 | if (toktype == KG_TOK_WRAP_MSG) { | ||
| 109 | /* XXX gone */ | ||
| 110 | goto out; | ||
| 111 | } | ||
| 112 | |||
| 113 | /* get the sign and seal algorithms */ | 102 | /* get the sign and seal algorithms */ |
| 114 | 103 | ||
| 115 | signalg = ptr[0] + (ptr[1] << 8); | 104 | signalg = ptr[0] + (ptr[1] << 8); |
| @@ -120,14 +109,7 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
| 120 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) | 109 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) |
| 121 | goto out; | 110 | goto out; |
| 122 | 111 | ||
| 123 | if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || | 112 | if (sealalg != 0xffff) |
| 124 | ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) | ||
| 125 | goto out; | ||
| 126 | |||
| 127 | /* in the current spec, there is only one valid seal algorithm per | ||
| 128 | key type, so a simple comparison is ok */ | ||
| 129 | |||
| 130 | if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) | ||
| 131 | goto out; | 113 | goto out; |
| 132 | 114 | ||
| 133 | /* there are several mappings of seal algorithms to sign algorithms, | 115 | /* there are several mappings of seal algorithms to sign algorithms, |
| @@ -154,7 +136,7 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
| 154 | switch (signalg) { | 136 | switch (signalg) { |
| 155 | case SGN_ALG_DES_MAC_MD5: | 137 | case SGN_ALG_DES_MAC_MD5: |
| 156 | ret = make_checksum(checksum_type, ptr - 2, 8, | 138 | ret = make_checksum(checksum_type, ptr - 2, 8, |
| 157 | message_buffer, &md5cksum); | 139 | message_buffer, 0, &md5cksum); |
| 158 | if (ret) | 140 | if (ret) |
| 159 | goto out; | 141 | goto out; |
| 160 | 142 | ||
| @@ -175,9 +157,6 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
| 175 | 157 | ||
| 176 | /* it got through unscathed. Make sure the context is unexpired */ | 158 | /* it got through unscathed. Make sure the context is unexpired */ |
| 177 | 159 | ||
| 178 | if (qop_state) | ||
| 179 | *qop_state = GSS_C_QOP_DEFAULT; | ||
| 180 | |||
| 181 | now = get_seconds(); | 160 | now = get_seconds(); |
| 182 | 161 | ||
| 183 | ret = GSS_S_CONTEXT_EXPIRED; | 162 | ret = GSS_S_CONTEXT_EXPIRED; |
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c new file mode 100644 index 000000000000..af777cf9f251 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c | |||
| @@ -0,0 +1,363 @@ | |||
| 1 | #include <linux/types.h> | ||
| 2 | #include <linux/slab.h> | ||
| 3 | #include <linux/jiffies.h> | ||
| 4 | #include <linux/sunrpc/gss_krb5.h> | ||
| 5 | #include <linux/random.h> | ||
| 6 | #include <linux/pagemap.h> | ||
| 7 | #include <asm/scatterlist.h> | ||
| 8 | #include <linux/crypto.h> | ||
| 9 | |||
| 10 | #ifdef RPC_DEBUG | ||
| 11 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
| 12 | #endif | ||
| 13 | |||
| 14 | static inline int | ||
| 15 | gss_krb5_padding(int blocksize, int length) | ||
| 16 | { | ||
| 17 | /* Most of the code is block-size independent but currently we | ||
| 18 | * use only 8: */ | ||
| 19 | BUG_ON(blocksize != 8); | ||
| 20 | return 8 - (length & 7); | ||
| 21 | } | ||
| 22 | |||
| 23 | static inline void | ||
| 24 | gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) | ||
| 25 | { | ||
| 26 | int padding = gss_krb5_padding(blocksize, buf->len - offset); | ||
| 27 | char *p; | ||
| 28 | struct kvec *iov; | ||
| 29 | |||
| 30 | if (buf->page_len || buf->tail[0].iov_len) | ||
| 31 | iov = &buf->tail[0]; | ||
| 32 | else | ||
| 33 | iov = &buf->head[0]; | ||
| 34 | p = iov->iov_base + iov->iov_len; | ||
| 35 | iov->iov_len += padding; | ||
| 36 | buf->len += padding; | ||
| 37 | memset(p, padding, padding); | ||
| 38 | } | ||
| 39 | |||
| 40 | static inline int | ||
| 41 | gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) | ||
| 42 | { | ||
| 43 | u8 *ptr; | ||
| 44 | u8 pad; | ||
| 45 | int len = buf->len; | ||
| 46 | |||
| 47 | if (len <= buf->head[0].iov_len) { | ||
| 48 | pad = *(u8 *)(buf->head[0].iov_base + len - 1); | ||
| 49 | if (pad > buf->head[0].iov_len) | ||
| 50 | return -EINVAL; | ||
| 51 | buf->head[0].iov_len -= pad; | ||
| 52 | goto out; | ||
| 53 | } else | ||
| 54 | len -= buf->head[0].iov_len; | ||
| 55 | if (len <= buf->page_len) { | ||
| 56 | int last = (buf->page_base + len - 1) | ||
| 57 | >>PAGE_CACHE_SHIFT; | ||
| 58 | int offset = (buf->page_base + len - 1) | ||
| 59 | & (PAGE_CACHE_SIZE - 1); | ||
| 60 | ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); | ||
| 61 | pad = *(ptr + offset); | ||
| 62 | kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); | ||
| 63 | goto out; | ||
| 64 | } else | ||
| 65 | len -= buf->page_len; | ||
| 66 | BUG_ON(len > buf->tail[0].iov_len); | ||
| 67 | pad = *(u8 *)(buf->tail[0].iov_base + len - 1); | ||
| 68 | out: | ||
| 69 | /* XXX: NOTE: we do not adjust the page lengths--they represent | ||
| 70 | * a range of data in the real filesystem page cache, and we need | ||
| 71 | * to know that range so the xdr code can properly place read data. | ||
| 72 | * However adjusting the head length, as we do above, is harmless. | ||
| 73 | * In the case of a request that fits into a single page, the server | ||
| 74 | * also uses length and head length together to determine the original | ||
| 75 | * start of the request to copy the request for deferal; so it's | ||
| 76 | * easier on the server if we adjust head and tail length in tandem. | ||
| 77 | * It's not really a problem that we don't fool with the page and | ||
| 78 | * tail lengths, though--at worst badly formed xdr might lead the | ||
| 79 | * server to attempt to parse the padding. | ||
| 80 | * XXX: Document all these weird requirements for gss mechanism | ||
| 81 | * wrap/unwrap functions. */ | ||
| 82 | if (pad > blocksize) | ||
| 83 | return -EINVAL; | ||
| 84 | if (buf->len > pad) | ||
| 85 | buf->len -= pad; | ||
| 86 | else | ||
| 87 | return -EINVAL; | ||
| 88 | return 0; | ||
| 89 | } | ||
| 90 | |||
| 91 | static inline void | ||
| 92 | make_confounder(char *p, int blocksize) | ||
| 93 | { | ||
| 94 | static u64 i = 0; | ||
| 95 | u64 *q = (u64 *)p; | ||
| 96 | |||
| 97 | /* rfc1964 claims this should be "random". But all that's really | ||
| 98 | * necessary is that it be unique. And not even that is necessary in | ||
| 99 | * our case since our "gssapi" implementation exists only to support | ||
| 100 | * rpcsec_gss, so we know that the only buffers we will ever encrypt | ||
| 101 | * already begin with a unique sequence number. Just to hedge my bets | ||
| 102 | * I'll make a half-hearted attempt at something unique, but ensuring | ||
| 103 | * uniqueness would mean worrying about atomicity and rollover, and I | ||
| 104 | * don't care enough. */ | ||
| 105 | |||
| 106 | BUG_ON(blocksize != 8); | ||
| 107 | *q = i++; | ||
| 108 | } | ||
| 109 | |||
| 110 | /* Assumptions: the head and tail of inbuf are ours to play with. | ||
| 111 | * The pages, however, may be real pages in the page cache and we replace | ||
| 112 | * them with scratch pages from **pages before writing to them. */ | ||
| 113 | /* XXX: obviously the above should be documentation of wrap interface, | ||
| 114 | * and shouldn't be in this kerberos-specific file. */ | ||
| 115 | |||
| 116 | /* XXX factor out common code with seal/unseal. */ | ||
| 117 | |||
| 118 | u32 | ||
| 119 | gss_wrap_kerberos(struct gss_ctx *ctx, int offset, | ||
| 120 | struct xdr_buf *buf, struct page **pages) | ||
| 121 | { | ||
| 122 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
| 123 | s32 checksum_type; | ||
| 124 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
| 125 | int blocksize = 0, plainlen; | ||
| 126 | unsigned char *ptr, *krb5_hdr, *msg_start; | ||
| 127 | s32 now; | ||
| 128 | int headlen; | ||
| 129 | struct page **tmp_pages; | ||
| 130 | |||
| 131 | dprintk("RPC: gss_wrap_kerberos\n"); | ||
| 132 | |||
| 133 | now = get_seconds(); | ||
| 134 | |||
| 135 | switch (kctx->signalg) { | ||
| 136 | case SGN_ALG_DES_MAC_MD5: | ||
| 137 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
| 138 | break; | ||
| 139 | default: | ||
| 140 | dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" | ||
| 141 | " supported\n", kctx->signalg); | ||
| 142 | goto out_err; | ||
| 143 | } | ||
| 144 | if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { | ||
| 145 | dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", | ||
| 146 | kctx->sealalg); | ||
| 147 | goto out_err; | ||
| 148 | } | ||
| 149 | |||
| 150 | blocksize = crypto_tfm_alg_blocksize(kctx->enc); | ||
| 151 | gss_krb5_add_padding(buf, offset, blocksize); | ||
| 152 | BUG_ON((buf->len - offset) % blocksize); | ||
| 153 | plainlen = blocksize + buf->len - offset; | ||
| 154 | |||
| 155 | headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - | ||
| 156 | (buf->len - offset); | ||
| 157 | |||
| 158 | ptr = buf->head[0].iov_base + offset; | ||
| 159 | /* shift data to make room for header. */ | ||
| 160 | /* XXX Would be cleverer to encrypt while copying. */ | ||
| 161 | /* XXX bounds checking, slack, etc. */ | ||
| 162 | memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset); | ||
| 163 | buf->head[0].iov_len += headlen; | ||
| 164 | buf->len += headlen; | ||
| 165 | BUG_ON((buf->len - offset - headlen) % blocksize); | ||
| 166 | |||
| 167 | g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); | ||
| 168 | |||
| 169 | |||
| 170 | *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); | ||
| 171 | *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); | ||
| 172 | |||
| 173 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ | ||
| 174 | krb5_hdr = ptr - 2; | ||
| 175 | msg_start = krb5_hdr + 24; | ||
| 176 | /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); | ||
| 177 | |||
| 178 | *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); | ||
| 179 | memset(krb5_hdr + 4, 0xff, 4); | ||
| 180 | *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); | ||
| 181 | |||
| 182 | make_confounder(msg_start, blocksize); | ||
| 183 | |||
| 184 | /* XXXJBF: UGH!: */ | ||
| 185 | tmp_pages = buf->pages; | ||
| 186 | buf->pages = pages; | ||
| 187 | if (make_checksum(checksum_type, krb5_hdr, 8, buf, | ||
| 188 | offset + headlen - blocksize, &md5cksum)) | ||
| 189 | goto out_err; | ||
| 190 | buf->pages = tmp_pages; | ||
| 191 | |||
| 192 | switch (kctx->signalg) { | ||
| 193 | case SGN_ALG_DES_MAC_MD5: | ||
| 194 | if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, | ||
| 195 | md5cksum.data, md5cksum.len)) | ||
| 196 | goto out_err; | ||
| 197 | memcpy(krb5_hdr + 16, | ||
| 198 | md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, | ||
| 199 | KRB5_CKSUM_LENGTH); | ||
| 200 | |||
| 201 | dprintk("RPC: make_seal_token: cksum data: \n"); | ||
| 202 | print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); | ||
| 203 | break; | ||
| 204 | default: | ||
| 205 | BUG(); | ||
| 206 | } | ||
| 207 | |||
| 208 | kfree(md5cksum.data); | ||
| 209 | |||
| 210 | /* XXX would probably be more efficient to compute checksum | ||
| 211 | * and encrypt at the same time: */ | ||
| 212 | if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, | ||
| 213 | kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) | ||
| 214 | goto out_err; | ||
| 215 | |||
| 216 | if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, | ||
| 217 | pages)) | ||
| 218 | goto out_err; | ||
| 219 | |||
| 220 | kctx->seq_send++; | ||
| 221 | |||
| 222 | return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); | ||
| 223 | out_err: | ||
| 224 | if (md5cksum.data) kfree(md5cksum.data); | ||
| 225 | return GSS_S_FAILURE; | ||
| 226 | } | ||
| 227 | |||
| 228 | u32 | ||
| 229 | gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) | ||
| 230 | { | ||
| 231 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
| 232 | int signalg; | ||
| 233 | int sealalg; | ||
| 234 | s32 checksum_type; | ||
| 235 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
| 236 | s32 now; | ||
| 237 | int direction; | ||
| 238 | s32 seqnum; | ||
| 239 | unsigned char *ptr; | ||
| 240 | int bodysize; | ||
| 241 | u32 ret = GSS_S_DEFECTIVE_TOKEN; | ||
| 242 | void *data_start, *orig_start; | ||
| 243 | int data_len; | ||
| 244 | int blocksize; | ||
| 245 | |||
| 246 | dprintk("RPC: gss_unwrap_kerberos\n"); | ||
| 247 | |||
| 248 | ptr = (u8 *)buf->head[0].iov_base + offset; | ||
| 249 | if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, | ||
| 250 | buf->len - offset)) | ||
| 251 | goto out; | ||
| 252 | |||
| 253 | if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || | ||
| 254 | (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) | ||
| 255 | goto out; | ||
| 256 | |||
| 257 | /* XXX sanity-check bodysize?? */ | ||
| 258 | |||
| 259 | /* get the sign and seal algorithms */ | ||
| 260 | |||
| 261 | signalg = ptr[0] + (ptr[1] << 8); | ||
| 262 | sealalg = ptr[2] + (ptr[3] << 8); | ||
| 263 | |||
| 264 | /* Sanity checks */ | ||
| 265 | |||
| 266 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) | ||
| 267 | goto out; | ||
| 268 | |||
| 269 | if (sealalg == 0xffff) | ||
| 270 | goto out; | ||
| 271 | |||
| 272 | /* in the current spec, there is only one valid seal algorithm per | ||
| 273 | key type, so a simple comparison is ok */ | ||
| 274 | |||
| 275 | if (sealalg != kctx->sealalg) | ||
| 276 | goto out; | ||
| 277 | |||
| 278 | /* there are several mappings of seal algorithms to sign algorithms, | ||
| 279 | but few enough that we can try them all. */ | ||
| 280 | |||
| 281 | if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || | ||
| 282 | (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || | ||
| 283 | (kctx->sealalg == SEAL_ALG_DES3KD && | ||
| 284 | signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) | ||
| 285 | goto out; | ||
| 286 | |||
| 287 | if (gss_decrypt_xdr_buf(kctx->enc, buf, | ||
| 288 | ptr + 22 - (unsigned char *)buf->head[0].iov_base)) | ||
| 289 | goto out; | ||
| 290 | |||
| 291 | /* compute the checksum of the message */ | ||
| 292 | |||
| 293 | /* initialize the the cksum */ | ||
| 294 | switch (signalg) { | ||
| 295 | case SGN_ALG_DES_MAC_MD5: | ||
| 296 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
| 297 | break; | ||
| 298 | default: | ||
| 299 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
| 300 | goto out; | ||
| 301 | } | ||
| 302 | |||
| 303 | switch (signalg) { | ||
| 304 | case SGN_ALG_DES_MAC_MD5: | ||
| 305 | ret = make_checksum(checksum_type, ptr - 2, 8, buf, | ||
| 306 | ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); | ||
| 307 | if (ret) | ||
| 308 | goto out; | ||
| 309 | |||
| 310 | ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, | ||
| 311 | md5cksum.data, md5cksum.len); | ||
| 312 | if (ret) | ||
| 313 | goto out; | ||
| 314 | |||
| 315 | if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { | ||
| 316 | ret = GSS_S_BAD_SIG; | ||
| 317 | goto out; | ||
| 318 | } | ||
| 319 | break; | ||
| 320 | default: | ||
| 321 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
| 322 | goto out; | ||
| 323 | } | ||
| 324 | |||
| 325 | /* it got through unscathed. Make sure the context is unexpired */ | ||
| 326 | |||
| 327 | now = get_seconds(); | ||
| 328 | |||
| 329 | ret = GSS_S_CONTEXT_EXPIRED; | ||
| 330 | if (now > kctx->endtime) | ||
| 331 | goto out; | ||
| 332 | |||
| 333 | /* do sequencing checks */ | ||
| 334 | |||
| 335 | ret = GSS_S_BAD_SIG; | ||
| 336 | if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, | ||
| 337 | &seqnum))) | ||
| 338 | goto out; | ||
| 339 | |||
| 340 | if ((kctx->initiate && direction != 0xff) || | ||
| 341 | (!kctx->initiate && direction != 0)) | ||
| 342 | goto out; | ||
| 343 | |||
| 344 | /* Copy the data back to the right position. XXX: Would probably be | ||
| 345 | * better to copy and encrypt at the same time. */ | ||
| 346 | |||
| 347 | blocksize = crypto_tfm_alg_blocksize(kctx->enc); | ||
| 348 | data_start = ptr + 22 + blocksize; | ||
| 349 | orig_start = buf->head[0].iov_base + offset; | ||
| 350 | data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; | ||
| 351 | memmove(orig_start, data_start, data_len); | ||
| 352 | buf->head[0].iov_len -= (data_start - orig_start); | ||
| 353 | buf->len -= (data_start - orig_start); | ||
| 354 | |||
| 355 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
| 356 | if (gss_krb5_remove_padding(buf, blocksize)) | ||
| 357 | goto out; | ||
| 358 | |||
| 359 | ret = GSS_S_COMPLETE; | ||
| 360 | out: | ||
| 361 | if (md5cksum.data) kfree(md5cksum.data); | ||
| 362 | return ret; | ||
| 363 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 9dfb68377d69..b048bf672da2 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
| @@ -35,7 +35,6 @@ | |||
| 35 | 35 | ||
| 36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
| 37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
| 38 | #include <linux/socket.h> | ||
| 39 | #include <linux/module.h> | 38 | #include <linux/module.h> |
| 40 | #include <linux/sunrpc/msg_prot.h> | 39 | #include <linux/sunrpc/msg_prot.h> |
| 41 | #include <linux/sunrpc/gss_asn1.h> | 40 | #include <linux/sunrpc/gss_asn1.h> |
| @@ -251,13 +250,11 @@ gss_import_sec_context(const void *input_token, size_t bufsize, | |||
| 251 | 250 | ||
| 252 | u32 | 251 | u32 |
| 253 | gss_get_mic(struct gss_ctx *context_handle, | 252 | gss_get_mic(struct gss_ctx *context_handle, |
| 254 | u32 qop, | ||
| 255 | struct xdr_buf *message, | 253 | struct xdr_buf *message, |
| 256 | struct xdr_netobj *mic_token) | 254 | struct xdr_netobj *mic_token) |
| 257 | { | 255 | { |
| 258 | return context_handle->mech_type->gm_ops | 256 | return context_handle->mech_type->gm_ops |
| 259 | ->gss_get_mic(context_handle, | 257 | ->gss_get_mic(context_handle, |
| 260 | qop, | ||
| 261 | message, | 258 | message, |
| 262 | mic_token); | 259 | mic_token); |
| 263 | } | 260 | } |
| @@ -267,16 +264,34 @@ gss_get_mic(struct gss_ctx *context_handle, | |||
| 267 | u32 | 264 | u32 |
| 268 | gss_verify_mic(struct gss_ctx *context_handle, | 265 | gss_verify_mic(struct gss_ctx *context_handle, |
| 269 | struct xdr_buf *message, | 266 | struct xdr_buf *message, |
| 270 | struct xdr_netobj *mic_token, | 267 | struct xdr_netobj *mic_token) |
| 271 | u32 *qstate) | ||
| 272 | { | 268 | { |
| 273 | return context_handle->mech_type->gm_ops | 269 | return context_handle->mech_type->gm_ops |
| 274 | ->gss_verify_mic(context_handle, | 270 | ->gss_verify_mic(context_handle, |
| 275 | message, | 271 | message, |
| 276 | mic_token, | 272 | mic_token); |
| 277 | qstate); | ||
| 278 | } | 273 | } |
| 279 | 274 | ||
| 275 | u32 | ||
| 276 | gss_wrap(struct gss_ctx *ctx_id, | ||
| 277 | int offset, | ||
| 278 | struct xdr_buf *buf, | ||
| 279 | struct page **inpages) | ||
| 280 | { | ||
| 281 | return ctx_id->mech_type->gm_ops | ||
| 282 | ->gss_wrap(ctx_id, offset, buf, inpages); | ||
| 283 | } | ||
| 284 | |||
| 285 | u32 | ||
| 286 | gss_unwrap(struct gss_ctx *ctx_id, | ||
| 287 | int offset, | ||
| 288 | struct xdr_buf *buf) | ||
| 289 | { | ||
| 290 | return ctx_id->mech_type->gm_ops | ||
| 291 | ->gss_unwrap(ctx_id, offset, buf); | ||
| 292 | } | ||
| 293 | |||
| 294 | |||
| 280 | /* gss_delete_sec_context: free all resources associated with context_handle. | 295 | /* gss_delete_sec_context: free all resources associated with context_handle. |
| 281 | * Note this differs from the RFC 2744-specified prototype in that we don't | 296 | * Note this differs from the RFC 2744-specified prototype in that we don't |
| 282 | * bother returning an output token, since it would never be used anyway. */ | 297 | * bother returning an output token, since it would never be used anyway. */ |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 6c97d61baa9b..39b3edc14694 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c | |||
| @@ -224,18 +224,13 @@ gss_delete_sec_context_spkm3(void *internal_ctx) { | |||
| 224 | static u32 | 224 | static u32 |
| 225 | gss_verify_mic_spkm3(struct gss_ctx *ctx, | 225 | gss_verify_mic_spkm3(struct gss_ctx *ctx, |
| 226 | struct xdr_buf *signbuf, | 226 | struct xdr_buf *signbuf, |
| 227 | struct xdr_netobj *checksum, | 227 | struct xdr_netobj *checksum) |
| 228 | u32 *qstate) { | 228 | { |
| 229 | u32 maj_stat = 0; | 229 | u32 maj_stat = 0; |
| 230 | int qop_state = 0; | ||
| 231 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | 230 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; |
| 232 | 231 | ||
| 233 | dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); | 232 | dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); |
| 234 | maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, | 233 | maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); |
| 235 | SPKM_MIC_TOK); | ||
| 236 | |||
| 237 | if (!maj_stat && qop_state) | ||
| 238 | *qstate = qop_state; | ||
| 239 | 234 | ||
| 240 | dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); | 235 | dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); |
| 241 | return maj_stat; | 236 | return maj_stat; |
| @@ -243,15 +238,15 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, | |||
| 243 | 238 | ||
| 244 | static u32 | 239 | static u32 |
| 245 | gss_get_mic_spkm3(struct gss_ctx *ctx, | 240 | gss_get_mic_spkm3(struct gss_ctx *ctx, |
| 246 | u32 qop, | ||
| 247 | struct xdr_buf *message_buffer, | 241 | struct xdr_buf *message_buffer, |
| 248 | struct xdr_netobj *message_token) { | 242 | struct xdr_netobj *message_token) |
| 243 | { | ||
| 249 | u32 err = 0; | 244 | u32 err = 0; |
| 250 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | 245 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; |
| 251 | 246 | ||
| 252 | dprintk("RPC: gss_get_mic_spkm3\n"); | 247 | dprintk("RPC: gss_get_mic_spkm3\n"); |
| 253 | 248 | ||
| 254 | err = spkm3_make_token(sctx, qop, message_buffer, | 249 | err = spkm3_make_token(sctx, message_buffer, |
| 255 | message_token, SPKM_MIC_TOK); | 250 | message_token, SPKM_MIC_TOK); |
| 256 | return err; | 251 | return err; |
| 257 | } | 252 | } |
| @@ -264,8 +259,8 @@ static struct gss_api_ops gss_spkm3_ops = { | |||
| 264 | }; | 259 | }; |
| 265 | 260 | ||
| 266 | static struct pf_desc gss_spkm3_pfs[] = { | 261 | static struct pf_desc gss_spkm3_pfs[] = { |
| 267 | {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, | 262 | {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"}, |
| 268 | {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, | 263 | {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, |
| 269 | }; | 264 | }; |
| 270 | 265 | ||
| 271 | static struct gss_api_mech gss_spkm3_mech = { | 266 | static struct gss_api_mech gss_spkm3_mech = { |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 25339868d462..148201e929d0 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c | |||
| @@ -51,7 +51,7 @@ | |||
| 51 | */ | 51 | */ |
| 52 | 52 | ||
| 53 | u32 | 53 | u32 |
| 54 | spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, | 54 | spkm3_make_token(struct spkm3_ctx *ctx, |
| 55 | struct xdr_buf * text, struct xdr_netobj * token, | 55 | struct xdr_buf * text, struct xdr_netobj * token, |
| 56 | int toktype) | 56 | int toktype) |
| 57 | { | 57 | { |
| @@ -68,8 +68,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, | |||
| 68 | dprintk("RPC: spkm3_make_token\n"); | 68 | dprintk("RPC: spkm3_make_token\n"); |
| 69 | 69 | ||
| 70 | now = jiffies; | 70 | now = jiffies; |
| 71 | if (qop_req != 0) | ||
| 72 | goto out_err; | ||
| 73 | 71 | ||
| 74 | if (ctx->ctx_id.len != 16) { | 72 | if (ctx->ctx_id.len != 16) { |
| 75 | dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", | 73 | dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 65ce81bf0bc4..c3c0d9586103 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c | |||
| @@ -52,7 +52,7 @@ u32 | |||
| 52 | spkm3_read_token(struct spkm3_ctx *ctx, | 52 | spkm3_read_token(struct spkm3_ctx *ctx, |
| 53 | struct xdr_netobj *read_token, /* checksum */ | 53 | struct xdr_netobj *read_token, /* checksum */ |
| 54 | struct xdr_buf *message_buffer, /* signbuf */ | 54 | struct xdr_buf *message_buffer, /* signbuf */ |
| 55 | int *qop_state, int toktype) | 55 | int toktype) |
| 56 | { | 56 | { |
| 57 | s32 code; | 57 | s32 code; |
| 58 | struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; | 58 | struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index e3308195374e..e4ada15ed856 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
| @@ -566,8 +566,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, | |||
| 566 | 566 | ||
| 567 | if (rqstp->rq_deferred) /* skip verification of revisited request */ | 567 | if (rqstp->rq_deferred) /* skip verification of revisited request */ |
| 568 | return SVC_OK; | 568 | return SVC_OK; |
| 569 | if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) | 569 | if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) { |
| 570 | != GSS_S_COMPLETE) { | ||
| 571 | *authp = rpcsec_gsserr_credproblem; | 570 | *authp = rpcsec_gsserr_credproblem; |
| 572 | return SVC_DENIED; | 571 | return SVC_DENIED; |
| 573 | } | 572 | } |
| @@ -604,7 +603,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) | |||
| 604 | xdr_buf_from_iov(&iov, &verf_data); | 603 | xdr_buf_from_iov(&iov, &verf_data); |
| 605 | p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; | 604 | p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; |
| 606 | mic.data = (u8 *)(p + 1); | 605 | mic.data = (u8 *)(p + 1); |
| 607 | maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); | 606 | maj_stat = gss_get_mic(ctx_id, &verf_data, &mic); |
| 608 | if (maj_stat != GSS_S_COMPLETE) | 607 | if (maj_stat != GSS_S_COMPLETE) |
| 609 | return -1; | 608 | return -1; |
| 610 | *p++ = htonl(mic.len); | 609 | *p++ = htonl(mic.len); |
| @@ -710,7 +709,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) | |||
| 710 | goto out; | 709 | goto out; |
| 711 | if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) | 710 | if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) |
| 712 | goto out; | 711 | goto out; |
| 713 | maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); | 712 | maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); |
| 714 | if (maj_stat != GSS_S_COMPLETE) | 713 | if (maj_stat != GSS_S_COMPLETE) |
| 715 | goto out; | 714 | goto out; |
| 716 | if (ntohl(svc_getu32(&buf->head[0])) != seq) | 715 | if (ntohl(svc_getu32(&buf->head[0])) != seq) |
| @@ -1012,7 +1011,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) | |||
| 1012 | resv = &resbuf->tail[0]; | 1011 | resv = &resbuf->tail[0]; |
| 1013 | } | 1012 | } |
| 1014 | mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; | 1013 | mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; |
| 1015 | if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) | 1014 | if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) |
| 1016 | goto out_err; | 1015 | goto out_err; |
| 1017 | svc_putu32(resv, htonl(mic.len)); | 1016 | svc_putu32(resv, htonl(mic.len)); |
| 1018 | memset(mic.data + mic.len, 0, | 1017 | memset(mic.data + mic.len, 0, |
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 9b72d3abf823..f56767aaa927 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c | |||
| @@ -7,9 +7,7 @@ | |||
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
| 10 | #include <linux/socket.h> | ||
| 11 | #include <linux/module.h> | 10 | #include <linux/module.h> |
| 12 | #include <linux/in.h> | ||
| 13 | #include <linux/utsname.h> | 11 | #include <linux/utsname.h> |
| 14 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
| 15 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4ff297a9b15b..890fb5ea0dcb 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
| @@ -9,8 +9,6 @@ | |||
| 9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
| 10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
| 11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 12 | #include <linux/socket.h> | ||
| 13 | #include <linux/in.h> | ||
| 14 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
| 15 | #include <linux/sunrpc/auth.h> | 13 | #include <linux/sunrpc/auth.h> |
| 16 | 14 | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f17e6153b688..702ede309b06 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * linux/net/sunrpc/rpcclnt.c | 2 | * linux/net/sunrpc/clnt.c |
| 3 | * | 3 | * |
| 4 | * This file contains the high-level RPC interface. | 4 | * This file contains the high-level RPC interface. |
| 5 | * It is modeled as a finite state machine to support both synchronous | 5 | * It is modeled as a finite state machine to support both synchronous |
| @@ -27,7 +27,6 @@ | |||
| 27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
| 28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/in.h> | ||
| 31 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
| 32 | 31 | ||
| 33 | #include <linux/sunrpc/clnt.h> | 32 | #include <linux/sunrpc/clnt.h> |
| @@ -53,6 +52,7 @@ static void call_allocate(struct rpc_task *task); | |||
| 53 | static void call_encode(struct rpc_task *task); | 52 | static void call_encode(struct rpc_task *task); |
| 54 | static void call_decode(struct rpc_task *task); | 53 | static void call_decode(struct rpc_task *task); |
| 55 | static void call_bind(struct rpc_task *task); | 54 | static void call_bind(struct rpc_task *task); |
| 55 | static void call_bind_status(struct rpc_task *task); | ||
| 56 | static void call_transmit(struct rpc_task *task); | 56 | static void call_transmit(struct rpc_task *task); |
| 57 | static void call_status(struct rpc_task *task); | 57 | static void call_status(struct rpc_task *task); |
| 58 | static void call_refresh(struct rpc_task *task); | 58 | static void call_refresh(struct rpc_task *task); |
| @@ -517,15 +517,8 @@ void | |||
| 517 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) | 517 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) |
| 518 | { | 518 | { |
| 519 | struct rpc_xprt *xprt = clnt->cl_xprt; | 519 | struct rpc_xprt *xprt = clnt->cl_xprt; |
| 520 | 520 | if (xprt->ops->set_buffer_size) | |
| 521 | xprt->sndsize = 0; | 521 | xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); |
| 522 | if (sndsize) | ||
| 523 | xprt->sndsize = sndsize + RPC_SLACK_SPACE; | ||
| 524 | xprt->rcvsize = 0; | ||
| 525 | if (rcvsize) | ||
| 526 | xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; | ||
| 527 | if (xprt_connected(xprt)) | ||
| 528 | xprt_sock_setbufsize(xprt); | ||
| 529 | } | 522 | } |
| 530 | 523 | ||
| 531 | /* | 524 | /* |
| @@ -685,13 +678,11 @@ call_allocate(struct rpc_task *task) | |||
| 685 | static void | 678 | static void |
| 686 | call_encode(struct rpc_task *task) | 679 | call_encode(struct rpc_task *task) |
| 687 | { | 680 | { |
| 688 | struct rpc_clnt *clnt = task->tk_client; | ||
| 689 | struct rpc_rqst *req = task->tk_rqstp; | 681 | struct rpc_rqst *req = task->tk_rqstp; |
| 690 | struct xdr_buf *sndbuf = &req->rq_snd_buf; | 682 | struct xdr_buf *sndbuf = &req->rq_snd_buf; |
| 691 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 683 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
| 692 | unsigned int bufsiz; | 684 | unsigned int bufsiz; |
| 693 | kxdrproc_t encode; | 685 | kxdrproc_t encode; |
| 694 | int status; | ||
| 695 | u32 *p; | 686 | u32 *p; |
| 696 | 687 | ||
| 697 | dprintk("RPC: %4d call_encode (status %d)\n", | 688 | dprintk("RPC: %4d call_encode (status %d)\n", |
| @@ -719,11 +710,15 @@ call_encode(struct rpc_task *task) | |||
| 719 | rpc_exit(task, -EIO); | 710 | rpc_exit(task, -EIO); |
| 720 | return; | 711 | return; |
| 721 | } | 712 | } |
| 722 | if (encode && (status = rpcauth_wrap_req(task, encode, req, p, | 713 | if (encode == NULL) |
| 723 | task->tk_msg.rpc_argp)) < 0) { | 714 | return; |
| 724 | printk(KERN_WARNING "%s: can't encode arguments: %d\n", | 715 | |
| 725 | clnt->cl_protname, -status); | 716 | task->tk_status = rpcauth_wrap_req(task, encode, req, p, |
| 726 | rpc_exit(task, status); | 717 | task->tk_msg.rpc_argp); |
| 718 | if (task->tk_status == -ENOMEM) { | ||
| 719 | /* XXX: Is this sane? */ | ||
| 720 | rpc_delay(task, 3*HZ); | ||
| 721 | task->tk_status = -EAGAIN; | ||
| 727 | } | 722 | } |
| 728 | } | 723 | } |
| 729 | 724 | ||
| @@ -734,43 +729,95 @@ static void | |||
| 734 | call_bind(struct rpc_task *task) | 729 | call_bind(struct rpc_task *task) |
| 735 | { | 730 | { |
| 736 | struct rpc_clnt *clnt = task->tk_client; | 731 | struct rpc_clnt *clnt = task->tk_client; |
| 737 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
| 738 | |||
| 739 | dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, | ||
| 740 | xprt, (xprt_connected(xprt) ? "is" : "is not")); | ||
| 741 | 732 | ||
| 742 | task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect; | 733 | dprintk("RPC: %4d call_bind (status %d)\n", |
| 734 | task->tk_pid, task->tk_status); | ||
| 743 | 735 | ||
| 736 | task->tk_action = call_connect; | ||
| 744 | if (!clnt->cl_port) { | 737 | if (!clnt->cl_port) { |
| 745 | task->tk_action = call_connect; | 738 | task->tk_action = call_bind_status; |
| 746 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | 739 | task->tk_timeout = task->tk_xprt->bind_timeout; |
| 747 | rpc_getport(task, clnt); | 740 | rpc_getport(task, clnt); |
| 748 | } | 741 | } |
| 749 | } | 742 | } |
| 750 | 743 | ||
| 751 | /* | 744 | /* |
| 752 | * 4a. Connect to the RPC server (TCP case) | 745 | * 4a. Sort out bind result |
| 746 | */ | ||
| 747 | static void | ||
| 748 | call_bind_status(struct rpc_task *task) | ||
| 749 | { | ||
| 750 | int status = -EACCES; | ||
| 751 | |||
| 752 | if (task->tk_status >= 0) { | ||
| 753 | dprintk("RPC: %4d call_bind_status (status %d)\n", | ||
| 754 | task->tk_pid, task->tk_status); | ||
| 755 | task->tk_status = 0; | ||
| 756 | task->tk_action = call_connect; | ||
| 757 | return; | ||
| 758 | } | ||
| 759 | |||
| 760 | switch (task->tk_status) { | ||
| 761 | case -EACCES: | ||
| 762 | dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", | ||
| 763 | task->tk_pid); | ||
| 764 | rpc_delay(task, 3*HZ); | ||
| 765 | goto retry_bind; | ||
| 766 | case -ETIMEDOUT: | ||
| 767 | dprintk("RPC: %4d rpcbind request timed out\n", | ||
| 768 | task->tk_pid); | ||
| 769 | if (RPC_IS_SOFT(task)) { | ||
| 770 | status = -EIO; | ||
| 771 | break; | ||
| 772 | } | ||
| 773 | goto retry_bind; | ||
| 774 | case -EPFNOSUPPORT: | ||
| 775 | dprintk("RPC: %4d remote rpcbind service unavailable\n", | ||
| 776 | task->tk_pid); | ||
| 777 | break; | ||
| 778 | case -EPROTONOSUPPORT: | ||
| 779 | dprintk("RPC: %4d remote rpcbind version 2 unavailable\n", | ||
| 780 | task->tk_pid); | ||
| 781 | break; | ||
| 782 | default: | ||
| 783 | dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", | ||
| 784 | task->tk_pid, -task->tk_status); | ||
| 785 | status = -EIO; | ||
| 786 | break; | ||
| 787 | } | ||
| 788 | |||
| 789 | rpc_exit(task, status); | ||
| 790 | return; | ||
| 791 | |||
| 792 | retry_bind: | ||
| 793 | task->tk_status = 0; | ||
| 794 | task->tk_action = call_bind; | ||
| 795 | return; | ||
| 796 | } | ||
| 797 | |||
| 798 | /* | ||
| 799 | * 4b. Connect to the RPC server | ||
| 753 | */ | 800 | */ |
| 754 | static void | 801 | static void |
| 755 | call_connect(struct rpc_task *task) | 802 | call_connect(struct rpc_task *task) |
| 756 | { | 803 | { |
| 757 | struct rpc_clnt *clnt = task->tk_client; | 804 | struct rpc_xprt *xprt = task->tk_xprt; |
| 758 | 805 | ||
| 759 | dprintk("RPC: %4d call_connect status %d\n", | 806 | dprintk("RPC: %4d call_connect xprt %p %s connected\n", |
| 760 | task->tk_pid, task->tk_status); | 807 | task->tk_pid, xprt, |
| 808 | (xprt_connected(xprt) ? "is" : "is not")); | ||
| 761 | 809 | ||
| 762 | if (xprt_connected(clnt->cl_xprt)) { | 810 | task->tk_action = call_transmit; |
| 763 | task->tk_action = call_transmit; | 811 | if (!xprt_connected(xprt)) { |
| 764 | return; | 812 | task->tk_action = call_connect_status; |
| 813 | if (task->tk_status < 0) | ||
| 814 | return; | ||
| 815 | xprt_connect(task); | ||
| 765 | } | 816 | } |
| 766 | task->tk_action = call_connect_status; | ||
| 767 | if (task->tk_status < 0) | ||
| 768 | return; | ||
| 769 | xprt_connect(task); | ||
| 770 | } | 817 | } |
| 771 | 818 | ||
| 772 | /* | 819 | /* |
| 773 | * 4b. Sort out connect result | 820 | * 4c. Sort out connect result |
| 774 | */ | 821 | */ |
| 775 | static void | 822 | static void |
| 776 | call_connect_status(struct rpc_task *task) | 823 | call_connect_status(struct rpc_task *task) |
| @@ -778,6 +825,9 @@ call_connect_status(struct rpc_task *task) | |||
| 778 | struct rpc_clnt *clnt = task->tk_client; | 825 | struct rpc_clnt *clnt = task->tk_client; |
| 779 | int status = task->tk_status; | 826 | int status = task->tk_status; |
| 780 | 827 | ||
| 828 | dprintk("RPC: %5u call_connect_status (status %d)\n", | ||
| 829 | task->tk_pid, task->tk_status); | ||
| 830 | |||
| 781 | task->tk_status = 0; | 831 | task->tk_status = 0; |
| 782 | if (status >= 0) { | 832 | if (status >= 0) { |
| 783 | clnt->cl_stats->netreconn++; | 833 | clnt->cl_stats->netreconn++; |
| @@ -785,17 +835,19 @@ call_connect_status(struct rpc_task *task) | |||
| 785 | return; | 835 | return; |
| 786 | } | 836 | } |
| 787 | 837 | ||
| 788 | /* Something failed: we may have to rebind */ | 838 | /* Something failed: remote service port may have changed */ |
| 789 | if (clnt->cl_autobind) | 839 | if (clnt->cl_autobind) |
| 790 | clnt->cl_port = 0; | 840 | clnt->cl_port = 0; |
| 841 | |||
| 791 | switch (status) { | 842 | switch (status) { |
| 792 | case -ENOTCONN: | 843 | case -ENOTCONN: |
| 793 | case -ETIMEDOUT: | 844 | case -ETIMEDOUT: |
| 794 | case -EAGAIN: | 845 | case -EAGAIN: |
| 795 | task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect; | 846 | task->tk_action = call_bind; |
| 796 | break; | 847 | break; |
| 797 | default: | 848 | default: |
| 798 | rpc_exit(task, -EIO); | 849 | rpc_exit(task, -EIO); |
| 850 | break; | ||
| 799 | } | 851 | } |
| 800 | } | 852 | } |
| 801 | 853 | ||
| @@ -815,10 +867,12 @@ call_transmit(struct rpc_task *task) | |||
| 815 | if (task->tk_status != 0) | 867 | if (task->tk_status != 0) |
| 816 | return; | 868 | return; |
| 817 | /* Encode here so that rpcsec_gss can use correct sequence number. */ | 869 | /* Encode here so that rpcsec_gss can use correct sequence number. */ |
| 818 | if (!task->tk_rqstp->rq_bytes_sent) | 870 | if (task->tk_rqstp->rq_bytes_sent == 0) { |
| 819 | call_encode(task); | 871 | call_encode(task); |
| 820 | if (task->tk_status < 0) | 872 | /* Did the encode result in an error condition? */ |
| 821 | return; | 873 | if (task->tk_status != 0) |
| 874 | goto out_nosend; | ||
| 875 | } | ||
| 822 | xprt_transmit(task); | 876 | xprt_transmit(task); |
| 823 | if (task->tk_status < 0) | 877 | if (task->tk_status < 0) |
| 824 | return; | 878 | return; |
| @@ -826,6 +880,10 @@ call_transmit(struct rpc_task *task) | |||
| 826 | task->tk_action = NULL; | 880 | task->tk_action = NULL; |
| 827 | rpc_wake_up_task(task); | 881 | rpc_wake_up_task(task); |
| 828 | } | 882 | } |
| 883 | return; | ||
| 884 | out_nosend: | ||
| 885 | /* release socket write lock before attempting to handle error */ | ||
| 886 | xprt_abort_transmit(task); | ||
| 829 | } | 887 | } |
| 830 | 888 | ||
| 831 | /* | 889 | /* |
| @@ -1020,13 +1078,12 @@ static u32 * | |||
| 1020 | call_header(struct rpc_task *task) | 1078 | call_header(struct rpc_task *task) |
| 1021 | { | 1079 | { |
| 1022 | struct rpc_clnt *clnt = task->tk_client; | 1080 | struct rpc_clnt *clnt = task->tk_client; |
| 1023 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
| 1024 | struct rpc_rqst *req = task->tk_rqstp; | 1081 | struct rpc_rqst *req = task->tk_rqstp; |
| 1025 | u32 *p = req->rq_svec[0].iov_base; | 1082 | u32 *p = req->rq_svec[0].iov_base; |
| 1026 | 1083 | ||
| 1027 | /* FIXME: check buffer size? */ | 1084 | /* FIXME: check buffer size? */ |
| 1028 | if (xprt->stream) | 1085 | |
| 1029 | *p++ = 0; /* fill in later */ | 1086 | p = xprt_skip_transport_header(task->tk_xprt, p); |
| 1030 | *p++ = req->rq_xid; /* XID */ | 1087 | *p++ = req->rq_xid; /* XID */ |
| 1031 | *p++ = htonl(RPC_CALL); /* CALL */ | 1088 | *p++ = htonl(RPC_CALL); /* CALL */ |
| 1032 | *p++ = htonl(RPC_VERSION); /* RPC version */ | 1089 | *p++ = htonl(RPC_VERSION); /* RPC version */ |
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 4e81f2766923..a398575f94b8 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c | |||
| @@ -26,7 +26,7 @@ | |||
| 26 | #define PMAP_GETPORT 3 | 26 | #define PMAP_GETPORT 3 |
| 27 | 27 | ||
| 28 | static struct rpc_procinfo pmap_procedures[]; | 28 | static struct rpc_procinfo pmap_procedures[]; |
| 29 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); | 29 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); |
| 30 | static void pmap_getport_done(struct rpc_task *); | 30 | static void pmap_getport_done(struct rpc_task *); |
| 31 | static struct rpc_program pmap_program; | 31 | static struct rpc_program pmap_program; |
| 32 | static DEFINE_SPINLOCK(pmap_lock); | 32 | static DEFINE_SPINLOCK(pmap_lock); |
| @@ -65,7 +65,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) | |||
| 65 | map->pm_binding = 1; | 65 | map->pm_binding = 1; |
| 66 | spin_unlock(&pmap_lock); | 66 | spin_unlock(&pmap_lock); |
| 67 | 67 | ||
| 68 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot); | 68 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); |
| 69 | if (IS_ERR(pmap_clnt)) { | 69 | if (IS_ERR(pmap_clnt)) { |
| 70 | task->tk_status = PTR_ERR(pmap_clnt); | 70 | task->tk_status = PTR_ERR(pmap_clnt); |
| 71 | goto bailout; | 71 | goto bailout; |
| @@ -112,7 +112,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | |||
| 112 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); | 112 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); |
| 113 | 113 | ||
| 114 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); | 114 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); |
| 115 | pmap_clnt = pmap_create(hostname, sin, prot); | 115 | pmap_clnt = pmap_create(hostname, sin, prot, 0); |
| 116 | if (IS_ERR(pmap_clnt)) | 116 | if (IS_ERR(pmap_clnt)) |
| 117 | return PTR_ERR(pmap_clnt); | 117 | return PTR_ERR(pmap_clnt); |
| 118 | 118 | ||
| @@ -171,7 +171,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
| 171 | 171 | ||
| 172 | sin.sin_family = AF_INET; | 172 | sin.sin_family = AF_INET; |
| 173 | sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); | 173 | sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); |
| 174 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP); | 174 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); |
| 175 | if (IS_ERR(pmap_clnt)) { | 175 | if (IS_ERR(pmap_clnt)) { |
| 176 | error = PTR_ERR(pmap_clnt); | 176 | error = PTR_ERR(pmap_clnt); |
| 177 | dprintk("RPC: couldn't create pmap client. Error = %d\n", error); | 177 | dprintk("RPC: couldn't create pmap client. Error = %d\n", error); |
| @@ -198,7 +198,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
| 198 | } | 198 | } |
| 199 | 199 | ||
| 200 | static struct rpc_clnt * | 200 | static struct rpc_clnt * |
| 201 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) | 201 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) |
| 202 | { | 202 | { |
| 203 | struct rpc_xprt *xprt; | 203 | struct rpc_xprt *xprt; |
| 204 | struct rpc_clnt *clnt; | 204 | struct rpc_clnt *clnt; |
| @@ -208,6 +208,8 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) | |||
| 208 | if (IS_ERR(xprt)) | 208 | if (IS_ERR(xprt)) |
| 209 | return (struct rpc_clnt *)xprt; | 209 | return (struct rpc_clnt *)xprt; |
| 210 | xprt->addr.sin_port = htons(RPC_PMAP_PORT); | 210 | xprt->addr.sin_port = htons(RPC_PMAP_PORT); |
| 211 | if (!privileged) | ||
| 212 | xprt->resvport = 0; | ||
| 211 | 213 | ||
| 212 | /* printk("pmap: create clnt\n"); */ | 214 | /* printk("pmap: create clnt\n"); */ |
| 213 | clnt = rpc_new_client(xprt, hostname, | 215 | clnt = rpc_new_client(xprt, hostname, |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ded6c63f11ec..4f188d0a5d11 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
| @@ -76,25 +76,35 @@ int | |||
| 76 | rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) | 76 | rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) |
| 77 | { | 77 | { |
| 78 | struct rpc_inode *rpci = RPC_I(inode); | 78 | struct rpc_inode *rpci = RPC_I(inode); |
| 79 | int res = 0; | 79 | int res = -EPIPE; |
| 80 | 80 | ||
| 81 | down(&inode->i_sem); | 81 | down(&inode->i_sem); |
| 82 | if (rpci->ops == NULL) | ||
| 83 | goto out; | ||
| 82 | if (rpci->nreaders) { | 84 | if (rpci->nreaders) { |
| 83 | list_add_tail(&msg->list, &rpci->pipe); | 85 | list_add_tail(&msg->list, &rpci->pipe); |
| 84 | rpci->pipelen += msg->len; | 86 | rpci->pipelen += msg->len; |
| 87 | res = 0; | ||
| 85 | } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { | 88 | } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { |
| 86 | if (list_empty(&rpci->pipe)) | 89 | if (list_empty(&rpci->pipe)) |
| 87 | schedule_delayed_work(&rpci->queue_timeout, | 90 | schedule_delayed_work(&rpci->queue_timeout, |
| 88 | RPC_UPCALL_TIMEOUT); | 91 | RPC_UPCALL_TIMEOUT); |
| 89 | list_add_tail(&msg->list, &rpci->pipe); | 92 | list_add_tail(&msg->list, &rpci->pipe); |
| 90 | rpci->pipelen += msg->len; | 93 | rpci->pipelen += msg->len; |
| 91 | } else | 94 | res = 0; |
| 92 | res = -EPIPE; | 95 | } |
| 96 | out: | ||
| 93 | up(&inode->i_sem); | 97 | up(&inode->i_sem); |
| 94 | wake_up(&rpci->waitq); | 98 | wake_up(&rpci->waitq); |
| 95 | return res; | 99 | return res; |
| 96 | } | 100 | } |
| 97 | 101 | ||
| 102 | static inline void | ||
| 103 | rpc_inode_setowner(struct inode *inode, void *private) | ||
| 104 | { | ||
| 105 | RPC_I(inode)->private = private; | ||
| 106 | } | ||
| 107 | |||
| 98 | static void | 108 | static void |
| 99 | rpc_close_pipes(struct inode *inode) | 109 | rpc_close_pipes(struct inode *inode) |
| 100 | { | 110 | { |
| @@ -111,15 +121,10 @@ rpc_close_pipes(struct inode *inode) | |||
| 111 | rpci->ops->release_pipe(inode); | 121 | rpci->ops->release_pipe(inode); |
| 112 | rpci->ops = NULL; | 122 | rpci->ops = NULL; |
| 113 | } | 123 | } |
| 124 | rpc_inode_setowner(inode, NULL); | ||
| 114 | up(&inode->i_sem); | 125 | up(&inode->i_sem); |
| 115 | } | 126 | } |
| 116 | 127 | ||
| 117 | static inline void | ||
| 118 | rpc_inode_setowner(struct inode *inode, void *private) | ||
| 119 | { | ||
| 120 | RPC_I(inode)->private = private; | ||
| 121 | } | ||
| 122 | |||
| 123 | static struct inode * | 128 | static struct inode * |
| 124 | rpc_alloc_inode(struct super_block *sb) | 129 | rpc_alloc_inode(struct super_block *sb) |
| 125 | { | 130 | { |
| @@ -501,7 +506,6 @@ repeat: | |||
| 501 | dentry = dvec[--n]; | 506 | dentry = dvec[--n]; |
| 502 | if (dentry->d_inode) { | 507 | if (dentry->d_inode) { |
| 503 | rpc_close_pipes(dentry->d_inode); | 508 | rpc_close_pipes(dentry->d_inode); |
| 504 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
| 505 | simple_unlink(dir, dentry); | 509 | simple_unlink(dir, dentry); |
| 506 | } | 510 | } |
| 507 | dput(dentry); | 511 | dput(dentry); |
| @@ -576,10 +580,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 576 | int error; | 580 | int error; |
| 577 | 581 | ||
| 578 | shrink_dcache_parent(dentry); | 582 | shrink_dcache_parent(dentry); |
| 579 | if (dentry->d_inode) { | 583 | if (dentry->d_inode) |
| 580 | rpc_close_pipes(dentry->d_inode); | 584 | rpc_close_pipes(dentry->d_inode); |
| 581 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
| 582 | } | ||
| 583 | if ((error = simple_rmdir(dir, dentry)) != 0) | 585 | if ((error = simple_rmdir(dir, dentry)) != 0) |
| 584 | return error; | 586 | return error; |
| 585 | if (!error) { | 587 | if (!error) { |
| @@ -732,7 +734,6 @@ rpc_unlink(char *path) | |||
| 732 | d_drop(dentry); | 734 | d_drop(dentry); |
| 733 | if (dentry->d_inode) { | 735 | if (dentry->d_inode) { |
| 734 | rpc_close_pipes(dentry->d_inode); | 736 | rpc_close_pipes(dentry->d_inode); |
| 735 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
| 736 | error = simple_unlink(dir, dentry); | 737 | error = simple_unlink(dir, dentry); |
| 737 | } | 738 | } |
| 738 | dput(dentry); | 739 | dput(dentry); |
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c new file mode 100644 index 000000000000..8f97e90f36c8 --- /dev/null +++ b/net/sunrpc/socklib.c | |||
| @@ -0,0 +1,175 @@ | |||
| 1 | /* | ||
| 2 | * linux/net/sunrpc/socklib.c | ||
| 3 | * | ||
| 4 | * Common socket helper routines for RPC client and server | ||
| 5 | * | ||
| 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | ||
| 7 | */ | ||
| 8 | |||
| 9 | #include <linux/types.h> | ||
| 10 | #include <linux/pagemap.h> | ||
| 11 | #include <linux/udp.h> | ||
| 12 | #include <linux/sunrpc/xdr.h> | ||
| 13 | |||
| 14 | |||
| 15 | /** | ||
| 16 | * skb_read_bits - copy some data bits from skb to internal buffer | ||
| 17 | * @desc: sk_buff copy helper | ||
| 18 | * @to: copy destination | ||
| 19 | * @len: number of bytes to copy | ||
| 20 | * | ||
| 21 | * Possibly called several times to iterate over an sk_buff and copy | ||
| 22 | * data out of it. | ||
| 23 | */ | ||
| 24 | static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) | ||
| 25 | { | ||
| 26 | if (len > desc->count) | ||
| 27 | len = desc->count; | ||
| 28 | if (skb_copy_bits(desc->skb, desc->offset, to, len)) | ||
| 29 | return 0; | ||
| 30 | desc->count -= len; | ||
| 31 | desc->offset += len; | ||
| 32 | return len; | ||
| 33 | } | ||
| 34 | |||
| 35 | /** | ||
| 36 | * skb_read_and_csum_bits - copy and checksum from skb to buffer | ||
| 37 | * @desc: sk_buff copy helper | ||
| 38 | * @to: copy destination | ||
| 39 | * @len: number of bytes to copy | ||
| 40 | * | ||
| 41 | * Same as skb_read_bits, but calculate a checksum at the same time. | ||
| 42 | */ | ||
| 43 | static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) | ||
| 44 | { | ||
| 45 | unsigned int csum2, pos; | ||
| 46 | |||
| 47 | if (len > desc->count) | ||
| 48 | len = desc->count; | ||
| 49 | pos = desc->offset; | ||
| 50 | csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); | ||
| 51 | desc->csum = csum_block_add(desc->csum, csum2, pos); | ||
| 52 | desc->count -= len; | ||
| 53 | desc->offset += len; | ||
| 54 | return len; | ||
| 55 | } | ||
| 56 | |||
| 57 | /** | ||
| 58 | * xdr_partial_copy_from_skb - copy data out of an skb | ||
| 59 | * @xdr: target XDR buffer | ||
| 60 | * @base: starting offset | ||
| 61 | * @desc: sk_buff copy helper | ||
| 62 | * @copy_actor: virtual method for copying data | ||
| 63 | * | ||
| 64 | */ | ||
| 65 | ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) | ||
| 66 | { | ||
| 67 | struct page **ppage = xdr->pages; | ||
| 68 | unsigned int len, pglen = xdr->page_len; | ||
| 69 | ssize_t copied = 0; | ||
| 70 | int ret; | ||
| 71 | |||
| 72 | len = xdr->head[0].iov_len; | ||
| 73 | if (base < len) { | ||
| 74 | len -= base; | ||
| 75 | ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); | ||
| 76 | copied += ret; | ||
| 77 | if (ret != len || !desc->count) | ||
| 78 | goto out; | ||
| 79 | base = 0; | ||
| 80 | } else | ||
| 81 | base -= len; | ||
| 82 | |||
| 83 | if (unlikely(pglen == 0)) | ||
| 84 | goto copy_tail; | ||
| 85 | if (unlikely(base >= pglen)) { | ||
| 86 | base -= pglen; | ||
| 87 | goto copy_tail; | ||
| 88 | } | ||
| 89 | if (base || xdr->page_base) { | ||
| 90 | pglen -= base; | ||
| 91 | base += xdr->page_base; | ||
| 92 | ppage += base >> PAGE_CACHE_SHIFT; | ||
| 93 | base &= ~PAGE_CACHE_MASK; | ||
| 94 | } | ||
| 95 | do { | ||
| 96 | char *kaddr; | ||
| 97 | |||
| 98 | /* ACL likes to be lazy in allocating pages - ACLs | ||
| 99 | * are small by default but can get huge. */ | ||
| 100 | if (unlikely(*ppage == NULL)) { | ||
| 101 | *ppage = alloc_page(GFP_ATOMIC); | ||
| 102 | if (unlikely(*ppage == NULL)) { | ||
| 103 | if (copied == 0) | ||
| 104 | copied = -ENOMEM; | ||
| 105 | goto out; | ||
| 106 | } | ||
| 107 | } | ||
| 108 | |||
| 109 | len = PAGE_CACHE_SIZE; | ||
| 110 | kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); | ||
| 111 | if (base) { | ||
| 112 | len -= base; | ||
| 113 | if (pglen < len) | ||
| 114 | len = pglen; | ||
| 115 | ret = copy_actor(desc, kaddr + base, len); | ||
| 116 | base = 0; | ||
| 117 | } else { | ||
| 118 | if (pglen < len) | ||
| 119 | len = pglen; | ||
| 120 | ret = copy_actor(desc, kaddr, len); | ||
| 121 | } | ||
| 122 | flush_dcache_page(*ppage); | ||
| 123 | kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); | ||
| 124 | copied += ret; | ||
| 125 | if (ret != len || !desc->count) | ||
| 126 | goto out; | ||
| 127 | ppage++; | ||
| 128 | } while ((pglen -= len) != 0); | ||
| 129 | copy_tail: | ||
| 130 | len = xdr->tail[0].iov_len; | ||
| 131 | if (base < len) | ||
| 132 | copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); | ||
| 133 | out: | ||
| 134 | return copied; | ||
| 135 | } | ||
| 136 | |||
| 137 | /** | ||
| 138 | * csum_partial_copy_to_xdr - checksum and copy data | ||
| 139 | * @xdr: target XDR buffer | ||
| 140 | * @skb: source skb | ||
| 141 | * | ||
| 142 | * We have set things up such that we perform the checksum of the UDP | ||
| 143 | * packet in parallel with the copies into the RPC client iovec. -DaveM | ||
| 144 | */ | ||
| 145 | int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | ||
| 146 | { | ||
| 147 | skb_reader_t desc; | ||
| 148 | |||
| 149 | desc.skb = skb; | ||
| 150 | desc.offset = sizeof(struct udphdr); | ||
| 151 | desc.count = skb->len - desc.offset; | ||
| 152 | |||
| 153 | if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
| 154 | goto no_checksum; | ||
| 155 | |||
| 156 | desc.csum = csum_partial(skb->data, desc.offset, skb->csum); | ||
| 157 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) | ||
| 158 | return -1; | ||
| 159 | if (desc.offset != skb->len) { | ||
| 160 | unsigned int csum2; | ||
| 161 | csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); | ||
| 162 | desc.csum = csum_block_add(desc.csum, csum2, desc.offset); | ||
| 163 | } | ||
| 164 | if (desc.count) | ||
| 165 | return -1; | ||
| 166 | if ((unsigned short)csum_fold(desc.csum)) | ||
| 167 | return -1; | ||
| 168 | return 0; | ||
| 169 | no_checksum: | ||
| 170 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) | ||
| 171 | return -1; | ||
| 172 | if (desc.count) | ||
| 173 | return -1; | ||
| 174 | return 0; | ||
| 175 | } | ||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ed48ff022d35..2387e7b823ff 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
| 11 | 11 | ||
| 12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
| 13 | #include <linux/socket.h> | ||
| 14 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
| 15 | #include <linux/uio.h> | 14 | #include <linux/uio.h> |
| 16 | #include <linux/unistd.h> | 15 | #include <linux/unistd.h> |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 30ec3efc48a6..f16e7cdd6150 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
| @@ -548,9 +548,6 @@ svc_write_space(struct sock *sk) | |||
| 548 | /* | 548 | /* |
| 549 | * Receive a datagram from a UDP socket. | 549 | * Receive a datagram from a UDP socket. |
| 550 | */ | 550 | */ |
| 551 | extern int | ||
| 552 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb); | ||
| 553 | |||
| 554 | static int | 551 | static int |
| 555 | svc_udp_recvfrom(struct svc_rqst *rqstp) | 552 | svc_udp_recvfrom(struct svc_rqst *rqstp) |
| 556 | { | 553 | { |
| @@ -587,7 +584,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) | |||
| 587 | struct timeval tv; | 584 | struct timeval tv; |
| 588 | 585 | ||
| 589 | tv.tv_sec = xtime.tv_sec; | 586 | tv.tv_sec = xtime.tv_sec; |
| 590 | tv.tv_usec = xtime.tv_nsec * 1000; | 587 | tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; |
| 591 | skb_set_timestamp(skb, &tv); | 588 | skb_set_timestamp(skb, &tv); |
| 592 | /* Don't enable netstamp, sunrpc doesn't | 589 | /* Don't enable netstamp, sunrpc doesn't |
| 593 | need that much accuracy */ | 590 | need that much accuracy */ |
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 1b9616a12e24..d0c9f460e411 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
| @@ -119,8 +119,18 @@ done: | |||
| 119 | return 0; | 119 | return 0; |
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
| 123 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
| 124 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | ||
| 125 | EXPORT_SYMBOL(xprt_min_resvport); | ||
| 126 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | ||
| 127 | EXPORT_SYMBOL(xprt_max_resvport); | ||
| 128 | |||
| 129 | |||
| 122 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; | 130 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; |
| 123 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; | 131 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; |
| 132 | static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; | ||
| 133 | static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; | ||
| 124 | 134 | ||
| 125 | static ctl_table debug_table[] = { | 135 | static ctl_table debug_table[] = { |
| 126 | { | 136 | { |
| @@ -177,6 +187,28 @@ static ctl_table debug_table[] = { | |||
| 177 | .extra1 = &min_slot_table_size, | 187 | .extra1 = &min_slot_table_size, |
| 178 | .extra2 = &max_slot_table_size | 188 | .extra2 = &max_slot_table_size |
| 179 | }, | 189 | }, |
| 190 | { | ||
| 191 | .ctl_name = CTL_MIN_RESVPORT, | ||
| 192 | .procname = "min_resvport", | ||
| 193 | .data = &xprt_min_resvport, | ||
| 194 | .maxlen = sizeof(unsigned int), | ||
| 195 | .mode = 0644, | ||
| 196 | .proc_handler = &proc_dointvec_minmax, | ||
| 197 | .strategy = &sysctl_intvec, | ||
| 198 | .extra1 = &xprt_min_resvport_limit, | ||
| 199 | .extra2 = &xprt_max_resvport_limit | ||
| 200 | }, | ||
| 201 | { | ||
| 202 | .ctl_name = CTL_MAX_RESVPORT, | ||
| 203 | .procname = "max_resvport", | ||
| 204 | .data = &xprt_max_resvport, | ||
| 205 | .maxlen = sizeof(unsigned int), | ||
| 206 | .mode = 0644, | ||
| 207 | .proc_handler = &proc_dointvec_minmax, | ||
| 208 | .strategy = &sysctl_intvec, | ||
| 209 | .extra1 = &xprt_min_resvport_limit, | ||
| 210 | .extra2 = &xprt_max_resvport_limit | ||
| 211 | }, | ||
| 180 | { .ctl_name = 0 } | 212 | { .ctl_name = 0 } |
| 181 | }; | 213 | }; |
| 182 | 214 | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fde16f40a581..32df43372ee9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
| @@ -6,15 +6,12 @@ | |||
| 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #include <linux/module.h> | ||
| 9 | #include <linux/types.h> | 10 | #include <linux/types.h> |
| 10 | #include <linux/socket.h> | ||
| 11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
| 12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
| 13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
| 14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
| 15 | #include <linux/in.h> | ||
| 16 | #include <linux/net.h> | ||
| 17 | #include <net/sock.h> | ||
| 18 | #include <linux/sunrpc/xdr.h> | 15 | #include <linux/sunrpc/xdr.h> |
| 19 | #include <linux/sunrpc/msg_prot.h> | 16 | #include <linux/sunrpc/msg_prot.h> |
| 20 | 17 | ||
| @@ -176,178 +173,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, | |||
| 176 | xdr->buflen += len; | 173 | xdr->buflen += len; |
| 177 | } | 174 | } |
| 178 | 175 | ||
| 179 | ssize_t | ||
| 180 | xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, | ||
| 181 | skb_reader_t *desc, | ||
| 182 | skb_read_actor_t copy_actor) | ||
| 183 | { | ||
| 184 | struct page **ppage = xdr->pages; | ||
| 185 | unsigned int len, pglen = xdr->page_len; | ||
| 186 | ssize_t copied = 0; | ||
| 187 | int ret; | ||
| 188 | |||
| 189 | len = xdr->head[0].iov_len; | ||
| 190 | if (base < len) { | ||
| 191 | len -= base; | ||
| 192 | ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); | ||
| 193 | copied += ret; | ||
| 194 | if (ret != len || !desc->count) | ||
| 195 | goto out; | ||
| 196 | base = 0; | ||
| 197 | } else | ||
| 198 | base -= len; | ||
| 199 | |||
| 200 | if (pglen == 0) | ||
| 201 | goto copy_tail; | ||
| 202 | if (base >= pglen) { | ||
| 203 | base -= pglen; | ||
| 204 | goto copy_tail; | ||
| 205 | } | ||
| 206 | if (base || xdr->page_base) { | ||
| 207 | pglen -= base; | ||
| 208 | base += xdr->page_base; | ||
| 209 | ppage += base >> PAGE_CACHE_SHIFT; | ||
| 210 | base &= ~PAGE_CACHE_MASK; | ||
| 211 | } | ||
| 212 | do { | ||
| 213 | char *kaddr; | ||
| 214 | |||
| 215 | /* ACL likes to be lazy in allocating pages - ACLs | ||
| 216 | * are small by default but can get huge. */ | ||
| 217 | if (unlikely(*ppage == NULL)) { | ||
| 218 | *ppage = alloc_page(GFP_ATOMIC); | ||
| 219 | if (unlikely(*ppage == NULL)) { | ||
| 220 | if (copied == 0) | ||
| 221 | copied = -ENOMEM; | ||
| 222 | goto out; | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | len = PAGE_CACHE_SIZE; | ||
| 227 | kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); | ||
| 228 | if (base) { | ||
| 229 | len -= base; | ||
| 230 | if (pglen < len) | ||
| 231 | len = pglen; | ||
| 232 | ret = copy_actor(desc, kaddr + base, len); | ||
| 233 | base = 0; | ||
| 234 | } else { | ||
| 235 | if (pglen < len) | ||
| 236 | len = pglen; | ||
| 237 | ret = copy_actor(desc, kaddr, len); | ||
| 238 | } | ||
| 239 | flush_dcache_page(*ppage); | ||
| 240 | kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); | ||
| 241 | copied += ret; | ||
| 242 | if (ret != len || !desc->count) | ||
| 243 | goto out; | ||
| 244 | ppage++; | ||
| 245 | } while ((pglen -= len) != 0); | ||
| 246 | copy_tail: | ||
| 247 | len = xdr->tail[0].iov_len; | ||
| 248 | if (base < len) | ||
| 249 | copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); | ||
| 250 | out: | ||
| 251 | return copied; | ||
| 252 | } | ||
| 253 | |||
| 254 | |||
| 255 | int | ||
| 256 | xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | ||
| 257 | struct xdr_buf *xdr, unsigned int base, int msgflags) | ||
| 258 | { | ||
| 259 | struct page **ppage = xdr->pages; | ||
| 260 | unsigned int len, pglen = xdr->page_len; | ||
| 261 | int err, ret = 0; | ||
| 262 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
| 263 | |||
| 264 | len = xdr->head[0].iov_len; | ||
| 265 | if (base < len || (addr != NULL && base == 0)) { | ||
| 266 | struct kvec iov = { | ||
| 267 | .iov_base = xdr->head[0].iov_base + base, | ||
| 268 | .iov_len = len - base, | ||
| 269 | }; | ||
| 270 | struct msghdr msg = { | ||
| 271 | .msg_name = addr, | ||
| 272 | .msg_namelen = addrlen, | ||
| 273 | .msg_flags = msgflags, | ||
| 274 | }; | ||
| 275 | if (xdr->len > len) | ||
| 276 | msg.msg_flags |= MSG_MORE; | ||
| 277 | |||
| 278 | if (iov.iov_len != 0) | ||
| 279 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
| 280 | else | ||
| 281 | err = kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
| 282 | if (ret == 0) | ||
| 283 | ret = err; | ||
| 284 | else if (err > 0) | ||
| 285 | ret += err; | ||
| 286 | if (err != iov.iov_len) | ||
| 287 | goto out; | ||
| 288 | base = 0; | ||
| 289 | } else | ||
| 290 | base -= len; | ||
| 291 | |||
| 292 | if (pglen == 0) | ||
| 293 | goto copy_tail; | ||
| 294 | if (base >= pglen) { | ||
| 295 | base -= pglen; | ||
| 296 | goto copy_tail; | ||
| 297 | } | ||
| 298 | if (base || xdr->page_base) { | ||
| 299 | pglen -= base; | ||
| 300 | base += xdr->page_base; | ||
| 301 | ppage += base >> PAGE_CACHE_SHIFT; | ||
| 302 | base &= ~PAGE_CACHE_MASK; | ||
| 303 | } | ||
| 304 | |||
| 305 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
| 306 | do { | ||
| 307 | int flags = msgflags; | ||
| 308 | |||
| 309 | len = PAGE_CACHE_SIZE; | ||
| 310 | if (base) | ||
| 311 | len -= base; | ||
| 312 | if (pglen < len) | ||
| 313 | len = pglen; | ||
| 314 | |||
| 315 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
| 316 | flags |= MSG_MORE; | ||
| 317 | |||
| 318 | /* Hmm... We might be dealing with highmem pages */ | ||
| 319 | if (PageHighMem(*ppage)) | ||
| 320 | sendpage = sock_no_sendpage; | ||
| 321 | err = sendpage(sock, *ppage, base, len, flags); | ||
| 322 | if (ret == 0) | ||
| 323 | ret = err; | ||
| 324 | else if (err > 0) | ||
| 325 | ret += err; | ||
| 326 | if (err != len) | ||
| 327 | goto out; | ||
| 328 | base = 0; | ||
| 329 | ppage++; | ||
| 330 | } while ((pglen -= len) != 0); | ||
| 331 | copy_tail: | ||
| 332 | len = xdr->tail[0].iov_len; | ||
| 333 | if (base < len) { | ||
| 334 | struct kvec iov = { | ||
| 335 | .iov_base = xdr->tail[0].iov_base + base, | ||
| 336 | .iov_len = len - base, | ||
| 337 | }; | ||
| 338 | struct msghdr msg = { | ||
| 339 | .msg_flags = msgflags, | ||
| 340 | }; | ||
| 341 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
| 342 | if (ret == 0) | ||
| 343 | ret = err; | ||
| 344 | else if (err > 0) | ||
| 345 | ret += err; | ||
| 346 | } | ||
| 347 | out: | ||
| 348 | return ret; | ||
| 349 | } | ||
| 350 | |||
| 351 | 176 | ||
| 352 | /* | 177 | /* |
| 353 | * Helper routines for doing 'memmove' like operations on a struct xdr_buf | 178 | * Helper routines for doing 'memmove' like operations on a struct xdr_buf |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3c654e06b084..6dda3860351f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
| @@ -10,12 +10,12 @@ | |||
| 10 | * one is available. Otherwise, it sleeps on the backlog queue | 10 | * one is available. Otherwise, it sleeps on the backlog queue |
| 11 | * (xprt_reserve). | 11 | * (xprt_reserve). |
| 12 | * - Next, the caller puts together the RPC message, stuffs it into | 12 | * - Next, the caller puts together the RPC message, stuffs it into |
| 13 | * the request struct, and calls xprt_call(). | 13 | * the request struct, and calls xprt_transmit(). |
| 14 | * - xprt_call transmits the message and installs the caller on the | 14 | * - xprt_transmit sends the message and installs the caller on the |
| 15 | * socket's wait list. At the same time, it installs a timer that | 15 | * transport's wait list. At the same time, it installs a timer that |
| 16 | * is run after the packet's timeout has expired. | 16 | * is run after the packet's timeout has expired. |
| 17 | * - When a packet arrives, the data_ready handler walks the list of | 17 | * - When a packet arrives, the data_ready handler walks the list of |
| 18 | * pending requests for that socket. If a matching XID is found, the | 18 | * pending requests for that transport. If a matching XID is found, the |
| 19 | * caller is woken up, and the timer removed. | 19 | * caller is woken up, and the timer removed. |
| 20 | * - When no reply arrives within the timeout interval, the timer is | 20 | * - When no reply arrives within the timeout interval, the timer is |
| 21 | * fired by the kernel and runs xprt_timer(). It either adjusts the | 21 | * fired by the kernel and runs xprt_timer(). It either adjusts the |
| @@ -33,36 +33,17 @@ | |||
| 33 | * | 33 | * |
| 34 | * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> | 34 | * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> |
| 35 | * | 35 | * |
| 36 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | 36 | * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com> |
| 37 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
| 38 | * TCP NFS related read + write fixes | ||
| 39 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
| 40 | * | ||
| 41 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
| 42 | * Fix behaviour when socket buffer is full. | ||
| 43 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
| 44 | */ | 37 | */ |
| 45 | 38 | ||
| 39 | #include <linux/module.h> | ||
| 40 | |||
| 46 | #include <linux/types.h> | 41 | #include <linux/types.h> |
| 47 | #include <linux/slab.h> | 42 | #include <linux/interrupt.h> |
| 48 | #include <linux/capability.h> | ||
| 49 | #include <linux/sched.h> | ||
| 50 | #include <linux/errno.h> | ||
| 51 | #include <linux/socket.h> | ||
| 52 | #include <linux/in.h> | ||
| 53 | #include <linux/net.h> | ||
| 54 | #include <linux/mm.h> | ||
| 55 | #include <linux/udp.h> | ||
| 56 | #include <linux/tcp.h> | ||
| 57 | #include <linux/sunrpc/clnt.h> | ||
| 58 | #include <linux/file.h> | ||
| 59 | #include <linux/workqueue.h> | 43 | #include <linux/workqueue.h> |
| 60 | #include <linux/random.h> | 44 | #include <linux/random.h> |
| 61 | 45 | ||
| 62 | #include <net/sock.h> | 46 | #include <linux/sunrpc/clnt.h> |
| 63 | #include <net/checksum.h> | ||
| 64 | #include <net/udp.h> | ||
| 65 | #include <net/tcp.h> | ||
| 66 | 47 | ||
| 67 | /* | 48 | /* |
| 68 | * Local variables | 49 | * Local variables |
| @@ -73,81 +54,90 @@ | |||
| 73 | # define RPCDBG_FACILITY RPCDBG_XPRT | 54 | # define RPCDBG_FACILITY RPCDBG_XPRT |
| 74 | #endif | 55 | #endif |
| 75 | 56 | ||
| 76 | #define XPRT_MAX_BACKOFF (8) | ||
| 77 | #define XPRT_IDLE_TIMEOUT (5*60*HZ) | ||
| 78 | #define XPRT_MAX_RESVPORT (800) | ||
| 79 | |||
| 80 | /* | 57 | /* |
| 81 | * Local functions | 58 | * Local functions |
| 82 | */ | 59 | */ |
| 83 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); | 60 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); |
| 84 | static inline void do_xprt_reserve(struct rpc_task *); | 61 | static inline void do_xprt_reserve(struct rpc_task *); |
| 85 | static void xprt_disconnect(struct rpc_xprt *); | ||
| 86 | static void xprt_connect_status(struct rpc_task *task); | 62 | static void xprt_connect_status(struct rpc_task *task); |
| 87 | static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, | ||
| 88 | struct rpc_timeout *to); | ||
| 89 | static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); | ||
| 90 | static void xprt_bind_socket(struct rpc_xprt *, struct socket *); | ||
| 91 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | 63 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); |
| 92 | 64 | ||
| 93 | static int xprt_clear_backlog(struct rpc_xprt *xprt); | ||
| 94 | |||
| 95 | #ifdef RPC_DEBUG_DATA | ||
| 96 | /* | 65 | /* |
| 97 | * Print the buffer contents (first 128 bytes only--just enough for | 66 | * The transport code maintains an estimate on the maximum number of out- |
| 98 | * diropres return). | 67 | * standing RPC requests, using a smoothed version of the congestion |
| 68 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
| 69 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
| 70 | * halved; otherwise, it is incremented by 1/cwnd when | ||
| 71 | * | ||
| 72 | * - a reply is received and | ||
| 73 | * - a full number of requests are outstanding and | ||
| 74 | * - the congestion window hasn't been updated recently. | ||
| 99 | */ | 75 | */ |
| 100 | static void | 76 | #define RPC_CWNDSHIFT (8U) |
| 101 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | 77 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) |
| 102 | { | 78 | #define RPC_INITCWND RPC_CWNDSCALE |
| 103 | u8 *buf = (u8 *) packet; | 79 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) |
| 104 | int j; | ||
| 105 | |||
| 106 | dprintk("RPC: %s\n", msg); | ||
| 107 | for (j = 0; j < count && j < 128; j += 4) { | ||
| 108 | if (!(j & 31)) { | ||
| 109 | if (j) | ||
| 110 | dprintk("\n"); | ||
| 111 | dprintk("0x%04x ", j); | ||
| 112 | } | ||
| 113 | dprintk("%02x%02x%02x%02x ", | ||
| 114 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
| 115 | } | ||
| 116 | dprintk("\n"); | ||
| 117 | } | ||
| 118 | #else | ||
| 119 | static inline void | ||
| 120 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | ||
| 121 | { | ||
| 122 | /* NOP */ | ||
| 123 | } | ||
| 124 | #endif | ||
| 125 | 80 | ||
| 126 | /* | 81 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) |
| 127 | * Look up RPC transport given an INET socket | 82 | |
| 83 | /** | ||
| 84 | * xprt_reserve_xprt - serialize write access to transports | ||
| 85 | * @task: task that is requesting access to the transport | ||
| 86 | * | ||
| 87 | * This prevents mixing the payload of separate requests, and prevents | ||
| 88 | * transport connects from colliding with writes. No congestion control | ||
| 89 | * is provided. | ||
| 128 | */ | 90 | */ |
| 129 | static inline struct rpc_xprt * | 91 | int xprt_reserve_xprt(struct rpc_task *task) |
| 130 | xprt_from_sock(struct sock *sk) | ||
| 131 | { | 92 | { |
| 132 | return (struct rpc_xprt *) sk->sk_user_data; | 93 | struct rpc_xprt *xprt = task->tk_xprt; |
| 94 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 95 | |||
| 96 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { | ||
| 97 | if (task == xprt->snd_task) | ||
| 98 | return 1; | ||
| 99 | if (task == NULL) | ||
| 100 | return 0; | ||
| 101 | goto out_sleep; | ||
| 102 | } | ||
| 103 | xprt->snd_task = task; | ||
| 104 | if (req) { | ||
| 105 | req->rq_bytes_sent = 0; | ||
| 106 | req->rq_ntrans++; | ||
| 107 | } | ||
| 108 | return 1; | ||
| 109 | |||
| 110 | out_sleep: | ||
| 111 | dprintk("RPC: %4d failed to lock transport %p\n", | ||
| 112 | task->tk_pid, xprt); | ||
| 113 | task->tk_timeout = 0; | ||
| 114 | task->tk_status = -EAGAIN; | ||
| 115 | if (req && req->rq_ntrans) | ||
| 116 | rpc_sleep_on(&xprt->resend, task, NULL, NULL); | ||
| 117 | else | ||
| 118 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | ||
| 119 | return 0; | ||
| 133 | } | 120 | } |
| 134 | 121 | ||
| 135 | /* | 122 | /* |
| 136 | * Serialize write access to sockets, in order to prevent different | 123 | * xprt_reserve_xprt_cong - serialize write access to transports |
| 137 | * requests from interfering with each other. | 124 | * @task: task that is requesting access to the transport |
| 138 | * Also prevents TCP socket connects from colliding with writes. | 125 | * |
| 126 | * Same as xprt_reserve_xprt, but Van Jacobson congestion control is | ||
| 127 | * integrated into the decision of whether a request is allowed to be | ||
| 128 | * woken up and given access to the transport. | ||
| 139 | */ | 129 | */ |
| 140 | static int | 130 | int xprt_reserve_xprt_cong(struct rpc_task *task) |
| 141 | __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
| 142 | { | 131 | { |
| 132 | struct rpc_xprt *xprt = task->tk_xprt; | ||
| 143 | struct rpc_rqst *req = task->tk_rqstp; | 133 | struct rpc_rqst *req = task->tk_rqstp; |
| 144 | 134 | ||
| 145 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { | 135 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { |
| 146 | if (task == xprt->snd_task) | 136 | if (task == xprt->snd_task) |
| 147 | return 1; | 137 | return 1; |
| 148 | goto out_sleep; | 138 | goto out_sleep; |
| 149 | } | 139 | } |
| 150 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | 140 | if (__xprt_get_cong(xprt, task)) { |
| 151 | xprt->snd_task = task; | 141 | xprt->snd_task = task; |
| 152 | if (req) { | 142 | if (req) { |
| 153 | req->rq_bytes_sent = 0; | 143 | req->rq_bytes_sent = 0; |
| @@ -156,10 +146,10 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | |||
| 156 | return 1; | 146 | return 1; |
| 157 | } | 147 | } |
| 158 | smp_mb__before_clear_bit(); | 148 | smp_mb__before_clear_bit(); |
| 159 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 149 | clear_bit(XPRT_LOCKED, &xprt->state); |
| 160 | smp_mb__after_clear_bit(); | 150 | smp_mb__after_clear_bit(); |
| 161 | out_sleep: | 151 | out_sleep: |
| 162 | dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); | 152 | dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); |
| 163 | task->tk_timeout = 0; | 153 | task->tk_timeout = 0; |
| 164 | task->tk_status = -EAGAIN; | 154 | task->tk_status = -EAGAIN; |
| 165 | if (req && req->rq_ntrans) | 155 | if (req && req->rq_ntrans) |
| @@ -169,26 +159,52 @@ out_sleep: | |||
| 169 | return 0; | 159 | return 0; |
| 170 | } | 160 | } |
| 171 | 161 | ||
| 172 | static inline int | 162 | static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) |
| 173 | xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
| 174 | { | 163 | { |
| 175 | int retval; | 164 | int retval; |
| 176 | 165 | ||
| 177 | spin_lock_bh(&xprt->sock_lock); | 166 | spin_lock_bh(&xprt->transport_lock); |
| 178 | retval = __xprt_lock_write(xprt, task); | 167 | retval = xprt->ops->reserve_xprt(task); |
| 179 | spin_unlock_bh(&xprt->sock_lock); | 168 | spin_unlock_bh(&xprt->transport_lock); |
| 180 | return retval; | 169 | return retval; |
| 181 | } | 170 | } |
| 182 | 171 | ||
| 172 | static void __xprt_lock_write_next(struct rpc_xprt *xprt) | ||
| 173 | { | ||
| 174 | struct rpc_task *task; | ||
| 175 | struct rpc_rqst *req; | ||
| 183 | 176 | ||
| 184 | static void | 177 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
| 185 | __xprt_lock_write_next(struct rpc_xprt *xprt) | 178 | return; |
| 179 | |||
| 180 | task = rpc_wake_up_next(&xprt->resend); | ||
| 181 | if (!task) { | ||
| 182 | task = rpc_wake_up_next(&xprt->sending); | ||
| 183 | if (!task) | ||
| 184 | goto out_unlock; | ||
| 185 | } | ||
| 186 | |||
| 187 | req = task->tk_rqstp; | ||
| 188 | xprt->snd_task = task; | ||
| 189 | if (req) { | ||
| 190 | req->rq_bytes_sent = 0; | ||
| 191 | req->rq_ntrans++; | ||
| 192 | } | ||
| 193 | return; | ||
| 194 | |||
| 195 | out_unlock: | ||
| 196 | smp_mb__before_clear_bit(); | ||
| 197 | clear_bit(XPRT_LOCKED, &xprt->state); | ||
| 198 | smp_mb__after_clear_bit(); | ||
| 199 | } | ||
| 200 | |||
| 201 | static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) | ||
| 186 | { | 202 | { |
| 187 | struct rpc_task *task; | 203 | struct rpc_task *task; |
| 188 | 204 | ||
| 189 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | 205 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
| 190 | return; | 206 | return; |
| 191 | if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) | 207 | if (RPCXPRT_CONGESTED(xprt)) |
| 192 | goto out_unlock; | 208 | goto out_unlock; |
| 193 | task = rpc_wake_up_next(&xprt->resend); | 209 | task = rpc_wake_up_next(&xprt->resend); |
| 194 | if (!task) { | 210 | if (!task) { |
| @@ -196,7 +212,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) | |||
| 196 | if (!task) | 212 | if (!task) |
| 197 | goto out_unlock; | 213 | goto out_unlock; |
| 198 | } | 214 | } |
| 199 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | 215 | if (__xprt_get_cong(xprt, task)) { |
| 200 | struct rpc_rqst *req = task->tk_rqstp; | 216 | struct rpc_rqst *req = task->tk_rqstp; |
| 201 | xprt->snd_task = task; | 217 | xprt->snd_task = task; |
| 202 | if (req) { | 218 | if (req) { |
| @@ -207,87 +223,52 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) | |||
| 207 | } | 223 | } |
| 208 | out_unlock: | 224 | out_unlock: |
| 209 | smp_mb__before_clear_bit(); | 225 | smp_mb__before_clear_bit(); |
| 210 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 226 | clear_bit(XPRT_LOCKED, &xprt->state); |
| 211 | smp_mb__after_clear_bit(); | 227 | smp_mb__after_clear_bit(); |
| 212 | } | 228 | } |
| 213 | 229 | ||
| 214 | /* | 230 | /** |
| 215 | * Releases the socket for use by other requests. | 231 | * xprt_release_xprt - allow other requests to use a transport |
| 232 | * @xprt: transport with other tasks potentially waiting | ||
| 233 | * @task: task that is releasing access to the transport | ||
| 234 | * | ||
| 235 | * Note that "task" can be NULL. No congestion control is provided. | ||
| 216 | */ | 236 | */ |
| 217 | static void | 237 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) |
| 218 | __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
| 219 | { | 238 | { |
| 220 | if (xprt->snd_task == task) { | 239 | if (xprt->snd_task == task) { |
| 221 | xprt->snd_task = NULL; | 240 | xprt->snd_task = NULL; |
| 222 | smp_mb__before_clear_bit(); | 241 | smp_mb__before_clear_bit(); |
| 223 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 242 | clear_bit(XPRT_LOCKED, &xprt->state); |
| 224 | smp_mb__after_clear_bit(); | 243 | smp_mb__after_clear_bit(); |
| 225 | __xprt_lock_write_next(xprt); | 244 | __xprt_lock_write_next(xprt); |
| 226 | } | 245 | } |
| 227 | } | 246 | } |
| 228 | 247 | ||
| 229 | static inline void | 248 | /** |
| 230 | xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | 249 | * xprt_release_xprt_cong - allow other requests to use a transport |
| 231 | { | 250 | * @xprt: transport with other tasks potentially waiting |
| 232 | spin_lock_bh(&xprt->sock_lock); | 251 | * @task: task that is releasing access to the transport |
| 233 | __xprt_release_write(xprt, task); | 252 | * |
| 234 | spin_unlock_bh(&xprt->sock_lock); | 253 | * Note that "task" can be NULL. Another task is awoken to use the |
| 235 | } | 254 | * transport if the transport's congestion window allows it. |
| 236 | |||
| 237 | /* | ||
| 238 | * Write data to socket. | ||
| 239 | */ | 255 | */ |
| 240 | static inline int | 256 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) |
| 241 | xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) | ||
| 242 | { | 257 | { |
| 243 | struct socket *sock = xprt->sock; | 258 | if (xprt->snd_task == task) { |
| 244 | struct xdr_buf *xdr = &req->rq_snd_buf; | 259 | xprt->snd_task = NULL; |
| 245 | struct sockaddr *addr = NULL; | 260 | smp_mb__before_clear_bit(); |
| 246 | int addrlen = 0; | 261 | clear_bit(XPRT_LOCKED, &xprt->state); |
| 247 | unsigned int skip; | 262 | smp_mb__after_clear_bit(); |
| 248 | int result; | 263 | __xprt_lock_write_next_cong(xprt); |
| 249 | |||
| 250 | if (!sock) | ||
| 251 | return -ENOTCONN; | ||
| 252 | |||
| 253 | xprt_pktdump("packet data:", | ||
| 254 | req->rq_svec->iov_base, | ||
| 255 | req->rq_svec->iov_len); | ||
| 256 | |||
| 257 | /* For UDP, we need to provide an address */ | ||
| 258 | if (!xprt->stream) { | ||
| 259 | addr = (struct sockaddr *) &xprt->addr; | ||
| 260 | addrlen = sizeof(xprt->addr); | ||
| 261 | } | 264 | } |
| 262 | /* Dont repeat bytes */ | 265 | } |
| 263 | skip = req->rq_bytes_sent; | ||
| 264 | |||
| 265 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
| 266 | result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); | ||
| 267 | |||
| 268 | dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); | ||
| 269 | |||
| 270 | if (result >= 0) | ||
| 271 | return result; | ||
| 272 | 266 | ||
| 273 | switch (result) { | 267 | static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) |
| 274 | case -ECONNREFUSED: | 268 | { |
| 275 | /* When the server has died, an ICMP port unreachable message | 269 | spin_lock_bh(&xprt->transport_lock); |
| 276 | * prompts ECONNREFUSED. | 270 | xprt->ops->release_xprt(xprt, task); |
| 277 | */ | 271 | spin_unlock_bh(&xprt->transport_lock); |
| 278 | case -EAGAIN: | ||
| 279 | break; | ||
| 280 | case -ECONNRESET: | ||
| 281 | case -ENOTCONN: | ||
| 282 | case -EPIPE: | ||
| 283 | /* connection broken */ | ||
| 284 | if (xprt->stream) | ||
| 285 | result = -ENOTCONN; | ||
| 286 | break; | ||
| 287 | default: | ||
| 288 | printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); | ||
| 289 | } | ||
| 290 | return result; | ||
| 291 | } | 272 | } |
| 292 | 273 | ||
| 293 | /* | 274 | /* |
| @@ -321,26 +302,40 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) | |||
| 321 | return; | 302 | return; |
| 322 | req->rq_cong = 0; | 303 | req->rq_cong = 0; |
| 323 | xprt->cong -= RPC_CWNDSCALE; | 304 | xprt->cong -= RPC_CWNDSCALE; |
| 324 | __xprt_lock_write_next(xprt); | 305 | __xprt_lock_write_next_cong(xprt); |
| 325 | } | 306 | } |
| 326 | 307 | ||
| 327 | /* | 308 | /** |
| 328 | * Adjust RPC congestion window | 309 | * xprt_release_rqst_cong - housekeeping when request is complete |
| 310 | * @task: RPC request that recently completed | ||
| 311 | * | ||
| 312 | * Useful for transports that require congestion control. | ||
| 313 | */ | ||
| 314 | void xprt_release_rqst_cong(struct rpc_task *task) | ||
| 315 | { | ||
| 316 | __xprt_put_cong(task->tk_xprt, task->tk_rqstp); | ||
| 317 | } | ||
| 318 | |||
| 319 | /** | ||
| 320 | * xprt_adjust_cwnd - adjust transport congestion window | ||
| 321 | * @task: recently completed RPC request used to adjust window | ||
| 322 | * @result: result code of completed RPC request | ||
| 323 | * | ||
| 329 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. | 324 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. |
| 330 | */ | 325 | */ |
| 331 | static void | 326 | void xprt_adjust_cwnd(struct rpc_task *task, int result) |
| 332 | xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) | ||
| 333 | { | 327 | { |
| 334 | unsigned long cwnd; | 328 | struct rpc_rqst *req = task->tk_rqstp; |
| 329 | struct rpc_xprt *xprt = task->tk_xprt; | ||
| 330 | unsigned long cwnd = xprt->cwnd; | ||
| 335 | 331 | ||
| 336 | cwnd = xprt->cwnd; | ||
| 337 | if (result >= 0 && cwnd <= xprt->cong) { | 332 | if (result >= 0 && cwnd <= xprt->cong) { |
| 338 | /* The (cwnd >> 1) term makes sure | 333 | /* The (cwnd >> 1) term makes sure |
| 339 | * the result gets rounded properly. */ | 334 | * the result gets rounded properly. */ |
| 340 | cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; | 335 | cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; |
| 341 | if (cwnd > RPC_MAXCWND(xprt)) | 336 | if (cwnd > RPC_MAXCWND(xprt)) |
| 342 | cwnd = RPC_MAXCWND(xprt); | 337 | cwnd = RPC_MAXCWND(xprt); |
| 343 | __xprt_lock_write_next(xprt); | 338 | __xprt_lock_write_next_cong(xprt); |
| 344 | } else if (result == -ETIMEDOUT) { | 339 | } else if (result == -ETIMEDOUT) { |
| 345 | cwnd >>= 1; | 340 | cwnd >>= 1; |
| 346 | if (cwnd < RPC_CWNDSCALE) | 341 | if (cwnd < RPC_CWNDSCALE) |
| @@ -349,11 +344,89 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) | |||
| 349 | dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", | 344 | dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", |
| 350 | xprt->cong, xprt->cwnd, cwnd); | 345 | xprt->cong, xprt->cwnd, cwnd); |
| 351 | xprt->cwnd = cwnd; | 346 | xprt->cwnd = cwnd; |
| 347 | __xprt_put_cong(xprt, req); | ||
| 348 | } | ||
| 349 | |||
| 350 | /** | ||
| 351 | * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue | ||
| 352 | * @xprt: transport with waiting tasks | ||
| 353 | * @status: result code to plant in each task before waking it | ||
| 354 | * | ||
| 355 | */ | ||
| 356 | void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) | ||
| 357 | { | ||
| 358 | if (status < 0) | ||
| 359 | rpc_wake_up_status(&xprt->pending, status); | ||
| 360 | else | ||
| 361 | rpc_wake_up(&xprt->pending); | ||
| 362 | } | ||
| 363 | |||
| 364 | /** | ||
| 365 | * xprt_wait_for_buffer_space - wait for transport output buffer to clear | ||
| 366 | * @task: task to be put to sleep | ||
| 367 | * | ||
| 368 | */ | ||
| 369 | void xprt_wait_for_buffer_space(struct rpc_task *task) | ||
| 370 | { | ||
| 371 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 372 | struct rpc_xprt *xprt = req->rq_xprt; | ||
| 373 | |||
| 374 | task->tk_timeout = req->rq_timeout; | ||
| 375 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | ||
| 376 | } | ||
| 377 | |||
| 378 | /** | ||
| 379 | * xprt_write_space - wake the task waiting for transport output buffer space | ||
| 380 | * @xprt: transport with waiting tasks | ||
| 381 | * | ||
| 382 | * Can be called in a soft IRQ context, so xprt_write_space never sleeps. | ||
| 383 | */ | ||
| 384 | void xprt_write_space(struct rpc_xprt *xprt) | ||
| 385 | { | ||
| 386 | if (unlikely(xprt->shutdown)) | ||
| 387 | return; | ||
| 388 | |||
| 389 | spin_lock_bh(&xprt->transport_lock); | ||
| 390 | if (xprt->snd_task) { | ||
| 391 | dprintk("RPC: write space: waking waiting task on xprt %p\n", | ||
| 392 | xprt); | ||
| 393 | rpc_wake_up_task(xprt->snd_task); | ||
| 394 | } | ||
| 395 | spin_unlock_bh(&xprt->transport_lock); | ||
| 396 | } | ||
| 397 | |||
| 398 | /** | ||
| 399 | * xprt_set_retrans_timeout_def - set a request's retransmit timeout | ||
| 400 | * @task: task whose timeout is to be set | ||
| 401 | * | ||
| 402 | * Set a request's retransmit timeout based on the transport's | ||
| 403 | * default timeout parameters. Used by transports that don't adjust | ||
| 404 | * the retransmit timeout based on round-trip time estimation. | ||
| 405 | */ | ||
| 406 | void xprt_set_retrans_timeout_def(struct rpc_task *task) | ||
| 407 | { | ||
| 408 | task->tk_timeout = task->tk_rqstp->rq_timeout; | ||
| 352 | } | 409 | } |
| 353 | 410 | ||
| 354 | /* | 411 | /* |
| 355 | * Reset the major timeout value | 412 | * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout |
| 413 | * @task: task whose timeout is to be set | ||
| 414 | * | ||
| 415 | * Set a request's retransmit timeout using the RTT estimator. | ||
| 356 | */ | 416 | */ |
| 417 | void xprt_set_retrans_timeout_rtt(struct rpc_task *task) | ||
| 418 | { | ||
| 419 | int timer = task->tk_msg.rpc_proc->p_timer; | ||
| 420 | struct rpc_rtt *rtt = task->tk_client->cl_rtt; | ||
| 421 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 422 | unsigned long max_timeout = req->rq_xprt->timeout.to_maxval; | ||
| 423 | |||
| 424 | task->tk_timeout = rpc_calc_rto(rtt, timer); | ||
| 425 | task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; | ||
| 426 | if (task->tk_timeout > max_timeout || task->tk_timeout == 0) | ||
| 427 | task->tk_timeout = max_timeout; | ||
| 428 | } | ||
| 429 | |||
| 357 | static void xprt_reset_majortimeo(struct rpc_rqst *req) | 430 | static void xprt_reset_majortimeo(struct rpc_rqst *req) |
| 358 | { | 431 | { |
| 359 | struct rpc_timeout *to = &req->rq_xprt->timeout; | 432 | struct rpc_timeout *to = &req->rq_xprt->timeout; |
| @@ -368,8 +441,10 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req) | |||
| 368 | req->rq_majortimeo += jiffies; | 441 | req->rq_majortimeo += jiffies; |
| 369 | } | 442 | } |
| 370 | 443 | ||
| 371 | /* | 444 | /** |
| 372 | * Adjust timeout values etc for next retransmit | 445 | * xprt_adjust_timeout - adjust timeout values for next retransmit |
| 446 | * @req: RPC request containing parameters to use for the adjustment | ||
| 447 | * | ||
| 373 | */ | 448 | */ |
| 374 | int xprt_adjust_timeout(struct rpc_rqst *req) | 449 | int xprt_adjust_timeout(struct rpc_rqst *req) |
| 375 | { | 450 | { |
| @@ -391,9 +466,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req) | |||
| 391 | req->rq_retries = 0; | 466 | req->rq_retries = 0; |
| 392 | xprt_reset_majortimeo(req); | 467 | xprt_reset_majortimeo(req); |
| 393 | /* Reset the RTT counters == "slow start" */ | 468 | /* Reset the RTT counters == "slow start" */ |
| 394 | spin_lock_bh(&xprt->sock_lock); | 469 | spin_lock_bh(&xprt->transport_lock); |
| 395 | rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); | 470 | rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); |
| 396 | spin_unlock_bh(&xprt->sock_lock); | 471 | spin_unlock_bh(&xprt->transport_lock); |
| 397 | pprintk("RPC: %lu timeout\n", jiffies); | 472 | pprintk("RPC: %lu timeout\n", jiffies); |
| 398 | status = -ETIMEDOUT; | 473 | status = -ETIMEDOUT; |
| 399 | } | 474 | } |
| @@ -405,133 +480,52 @@ int xprt_adjust_timeout(struct rpc_rqst *req) | |||
| 405 | return status; | 480 | return status; |
| 406 | } | 481 | } |
| 407 | 482 | ||
| 408 | /* | 483 | static void xprt_autoclose(void *args) |
| 409 | * Close down a transport socket | ||
| 410 | */ | ||
| 411 | static void | ||
| 412 | xprt_close(struct rpc_xprt *xprt) | ||
| 413 | { | ||
| 414 | struct socket *sock = xprt->sock; | ||
| 415 | struct sock *sk = xprt->inet; | ||
| 416 | |||
| 417 | if (!sk) | ||
| 418 | return; | ||
| 419 | |||
| 420 | write_lock_bh(&sk->sk_callback_lock); | ||
| 421 | xprt->inet = NULL; | ||
| 422 | xprt->sock = NULL; | ||
| 423 | |||
| 424 | sk->sk_user_data = NULL; | ||
| 425 | sk->sk_data_ready = xprt->old_data_ready; | ||
| 426 | sk->sk_state_change = xprt->old_state_change; | ||
| 427 | sk->sk_write_space = xprt->old_write_space; | ||
| 428 | write_unlock_bh(&sk->sk_callback_lock); | ||
| 429 | |||
| 430 | sk->sk_no_check = 0; | ||
| 431 | |||
| 432 | sock_release(sock); | ||
| 433 | } | ||
| 434 | |||
| 435 | static void | ||
| 436 | xprt_socket_autoclose(void *args) | ||
| 437 | { | 484 | { |
| 438 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | 485 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; |
| 439 | 486 | ||
| 440 | xprt_disconnect(xprt); | 487 | xprt_disconnect(xprt); |
| 441 | xprt_close(xprt); | 488 | xprt->ops->close(xprt); |
| 442 | xprt_release_write(xprt, NULL); | 489 | xprt_release_write(xprt, NULL); |
| 443 | } | 490 | } |
| 444 | 491 | ||
| 445 | /* | 492 | /** |
| 446 | * Mark a transport as disconnected | 493 | * xprt_disconnect - mark a transport as disconnected |
| 494 | * @xprt: transport to flag for disconnect | ||
| 495 | * | ||
| 447 | */ | 496 | */ |
| 448 | static void | 497 | void xprt_disconnect(struct rpc_xprt *xprt) |
| 449 | xprt_disconnect(struct rpc_xprt *xprt) | ||
| 450 | { | 498 | { |
| 451 | dprintk("RPC: disconnected transport %p\n", xprt); | 499 | dprintk("RPC: disconnected transport %p\n", xprt); |
| 452 | spin_lock_bh(&xprt->sock_lock); | 500 | spin_lock_bh(&xprt->transport_lock); |
| 453 | xprt_clear_connected(xprt); | 501 | xprt_clear_connected(xprt); |
| 454 | rpc_wake_up_status(&xprt->pending, -ENOTCONN); | 502 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
| 455 | spin_unlock_bh(&xprt->sock_lock); | 503 | spin_unlock_bh(&xprt->transport_lock); |
| 456 | } | 504 | } |
| 457 | 505 | ||
| 458 | /* | ||
| 459 | * Used to allow disconnection when we've been idle | ||
| 460 | */ | ||
| 461 | static void | 506 | static void |
| 462 | xprt_init_autodisconnect(unsigned long data) | 507 | xprt_init_autodisconnect(unsigned long data) |
| 463 | { | 508 | { |
| 464 | struct rpc_xprt *xprt = (struct rpc_xprt *)data; | 509 | struct rpc_xprt *xprt = (struct rpc_xprt *)data; |
| 465 | 510 | ||
| 466 | spin_lock(&xprt->sock_lock); | 511 | spin_lock(&xprt->transport_lock); |
| 467 | if (!list_empty(&xprt->recv) || xprt->shutdown) | 512 | if (!list_empty(&xprt->recv) || xprt->shutdown) |
| 468 | goto out_abort; | 513 | goto out_abort; |
| 469 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | 514 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
| 470 | goto out_abort; | 515 | goto out_abort; |
| 471 | spin_unlock(&xprt->sock_lock); | 516 | spin_unlock(&xprt->transport_lock); |
| 472 | /* Let keventd close the socket */ | 517 | if (xprt_connecting(xprt)) |
| 473 | if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) | ||
| 474 | xprt_release_write(xprt, NULL); | 518 | xprt_release_write(xprt, NULL); |
| 475 | else | 519 | else |
| 476 | schedule_work(&xprt->task_cleanup); | 520 | schedule_work(&xprt->task_cleanup); |
| 477 | return; | 521 | return; |
| 478 | out_abort: | 522 | out_abort: |
| 479 | spin_unlock(&xprt->sock_lock); | 523 | spin_unlock(&xprt->transport_lock); |
| 480 | } | ||
| 481 | |||
| 482 | static void xprt_socket_connect(void *args) | ||
| 483 | { | ||
| 484 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
| 485 | struct socket *sock = xprt->sock; | ||
| 486 | int status = -EIO; | ||
| 487 | |||
| 488 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
| 489 | goto out; | ||
| 490 | |||
| 491 | /* | ||
| 492 | * Start by resetting any existing state | ||
| 493 | */ | ||
| 494 | xprt_close(xprt); | ||
| 495 | sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); | ||
| 496 | if (sock == NULL) { | ||
| 497 | /* couldn't create socket or bind to reserved port; | ||
| 498 | * this is likely a permanent error, so cause an abort */ | ||
| 499 | goto out; | ||
| 500 | } | ||
| 501 | xprt_bind_socket(xprt, sock); | ||
| 502 | xprt_sock_setbufsize(xprt); | ||
| 503 | |||
| 504 | status = 0; | ||
| 505 | if (!xprt->stream) | ||
| 506 | goto out; | ||
| 507 | |||
| 508 | /* | ||
| 509 | * Tell the socket layer to start connecting... | ||
| 510 | */ | ||
| 511 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
| 512 | sizeof(xprt->addr), O_NONBLOCK); | ||
| 513 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
| 514 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
| 515 | if (status < 0) { | ||
| 516 | switch (status) { | ||
| 517 | case -EINPROGRESS: | ||
| 518 | case -EALREADY: | ||
| 519 | goto out_clear; | ||
| 520 | } | ||
| 521 | } | ||
| 522 | out: | ||
| 523 | if (status < 0) | ||
| 524 | rpc_wake_up_status(&xprt->pending, status); | ||
| 525 | else | ||
| 526 | rpc_wake_up(&xprt->pending); | ||
| 527 | out_clear: | ||
| 528 | smp_mb__before_clear_bit(); | ||
| 529 | clear_bit(XPRT_CONNECTING, &xprt->sockstate); | ||
| 530 | smp_mb__after_clear_bit(); | ||
| 531 | } | 524 | } |
| 532 | 525 | ||
| 533 | /* | 526 | /** |
| 534 | * Attempt to connect a TCP socket. | 527 | * xprt_connect - schedule a transport connect operation |
| 528 | * @task: RPC task that is requesting the connect | ||
| 535 | * | 529 | * |
| 536 | */ | 530 | */ |
| 537 | void xprt_connect(struct rpc_task *task) | 531 | void xprt_connect(struct rpc_task *task) |
| @@ -552,37 +546,19 @@ void xprt_connect(struct rpc_task *task) | |||
| 552 | if (!xprt_lock_write(xprt, task)) | 546 | if (!xprt_lock_write(xprt, task)) |
| 553 | return; | 547 | return; |
| 554 | if (xprt_connected(xprt)) | 548 | if (xprt_connected(xprt)) |
| 555 | goto out_write; | 549 | xprt_release_write(xprt, task); |
| 550 | else { | ||
| 551 | if (task->tk_rqstp) | ||
| 552 | task->tk_rqstp->rq_bytes_sent = 0; | ||
| 556 | 553 | ||
| 557 | if (task->tk_rqstp) | 554 | task->tk_timeout = xprt->connect_timeout; |
| 558 | task->tk_rqstp->rq_bytes_sent = 0; | 555 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); |
| 559 | 556 | xprt->ops->connect(task); | |
| 560 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | ||
| 561 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); | ||
| 562 | if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { | ||
| 563 | /* Note: if we are here due to a dropped connection | ||
| 564 | * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ | ||
| 565 | * seconds | ||
| 566 | */ | ||
| 567 | if (xprt->sock != NULL) | ||
| 568 | schedule_delayed_work(&xprt->sock_connect, | ||
| 569 | RPC_REESTABLISH_TIMEOUT); | ||
| 570 | else { | ||
| 571 | schedule_work(&xprt->sock_connect); | ||
| 572 | if (!RPC_IS_ASYNC(task)) | ||
| 573 | flush_scheduled_work(); | ||
| 574 | } | ||
| 575 | } | 557 | } |
| 576 | return; | 558 | return; |
| 577 | out_write: | ||
| 578 | xprt_release_write(xprt, task); | ||
| 579 | } | 559 | } |
| 580 | 560 | ||
| 581 | /* | 561 | static void xprt_connect_status(struct rpc_task *task) |
| 582 | * We arrive here when awoken from waiting on connection establishment. | ||
| 583 | */ | ||
| 584 | static void | ||
| 585 | xprt_connect_status(struct rpc_task *task) | ||
| 586 | { | 562 | { |
| 587 | struct rpc_xprt *xprt = task->tk_xprt; | 563 | struct rpc_xprt *xprt = task->tk_xprt; |
| 588 | 564 | ||
| @@ -592,31 +568,42 @@ xprt_connect_status(struct rpc_task *task) | |||
| 592 | return; | 568 | return; |
| 593 | } | 569 | } |
| 594 | 570 | ||
| 595 | /* if soft mounted, just cause this RPC to fail */ | ||
| 596 | if (RPC_IS_SOFT(task)) | ||
| 597 | task->tk_status = -EIO; | ||
| 598 | |||
| 599 | switch (task->tk_status) { | 571 | switch (task->tk_status) { |
| 600 | case -ECONNREFUSED: | 572 | case -ECONNREFUSED: |
| 601 | case -ECONNRESET: | 573 | case -ECONNRESET: |
| 574 | dprintk("RPC: %4d xprt_connect_status: server %s refused connection\n", | ||
| 575 | task->tk_pid, task->tk_client->cl_server); | ||
| 576 | break; | ||
| 602 | case -ENOTCONN: | 577 | case -ENOTCONN: |
| 603 | return; | 578 | dprintk("RPC: %4d xprt_connect_status: connection broken\n", |
| 579 | task->tk_pid); | ||
| 580 | break; | ||
| 604 | case -ETIMEDOUT: | 581 | case -ETIMEDOUT: |
| 605 | dprintk("RPC: %4d xprt_connect_status: timed out\n", | 582 | dprintk("RPC: %4d xprt_connect_status: connect attempt timed out\n", |
| 606 | task->tk_pid); | 583 | task->tk_pid); |
| 607 | break; | 584 | break; |
| 608 | default: | 585 | default: |
| 609 | printk(KERN_ERR "RPC: error %d connecting to server %s\n", | 586 | dprintk("RPC: %4d xprt_connect_status: error %d connecting to server %s\n", |
| 610 | -task->tk_status, task->tk_client->cl_server); | 587 | task->tk_pid, -task->tk_status, task->tk_client->cl_server); |
| 588 | xprt_release_write(xprt, task); | ||
| 589 | task->tk_status = -EIO; | ||
| 590 | return; | ||
| 591 | } | ||
| 592 | |||
| 593 | /* if soft mounted, just cause this RPC to fail */ | ||
| 594 | if (RPC_IS_SOFT(task)) { | ||
| 595 | xprt_release_write(xprt, task); | ||
| 596 | task->tk_status = -EIO; | ||
| 611 | } | 597 | } |
| 612 | xprt_release_write(xprt, task); | ||
| 613 | } | 598 | } |
| 614 | 599 | ||
| 615 | /* | 600 | /** |
| 616 | * Look up the RPC request corresponding to a reply, and then lock it. | 601 | * xprt_lookup_rqst - find an RPC request corresponding to an XID |
| 602 | * @xprt: transport on which the original request was transmitted | ||
| 603 | * @xid: RPC XID of incoming reply | ||
| 604 | * | ||
| 617 | */ | 605 | */ |
| 618 | static inline struct rpc_rqst * | 606 | struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) |
| 619 | xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) | ||
| 620 | { | 607 | { |
| 621 | struct list_head *pos; | 608 | struct list_head *pos; |
| 622 | struct rpc_rqst *req = NULL; | 609 | struct rpc_rqst *req = NULL; |
| @@ -631,556 +618,68 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) | |||
| 631 | return req; | 618 | return req; |
| 632 | } | 619 | } |
| 633 | 620 | ||
| 634 | /* | 621 | /** |
| 635 | * Complete reply received. | 622 | * xprt_update_rtt - update an RPC client's RTT state after receiving a reply |
| 636 | * The TCP code relies on us to remove the request from xprt->pending. | 623 | * @task: RPC request that recently completed |
| 637 | */ | 624 | * |
| 638 | static void | ||
| 639 | xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) | ||
| 640 | { | ||
| 641 | struct rpc_task *task = req->rq_task; | ||
| 642 | struct rpc_clnt *clnt = task->tk_client; | ||
| 643 | |||
| 644 | /* Adjust congestion window */ | ||
| 645 | if (!xprt->nocong) { | ||
| 646 | unsigned timer = task->tk_msg.rpc_proc->p_timer; | ||
| 647 | xprt_adjust_cwnd(xprt, copied); | ||
| 648 | __xprt_put_cong(xprt, req); | ||
| 649 | if (timer) { | ||
| 650 | if (req->rq_ntrans == 1) | ||
| 651 | rpc_update_rtt(clnt->cl_rtt, timer, | ||
| 652 | (long)jiffies - req->rq_xtime); | ||
| 653 | rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); | ||
| 654 | } | ||
| 655 | } | ||
| 656 | |||
| 657 | #ifdef RPC_PROFILE | ||
| 658 | /* Profile only reads for now */ | ||
| 659 | if (copied > 1024) { | ||
| 660 | static unsigned long nextstat; | ||
| 661 | static unsigned long pkt_rtt, pkt_len, pkt_cnt; | ||
| 662 | |||
| 663 | pkt_cnt++; | ||
| 664 | pkt_len += req->rq_slen + copied; | ||
| 665 | pkt_rtt += jiffies - req->rq_xtime; | ||
| 666 | if (time_before(nextstat, jiffies)) { | ||
| 667 | printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); | ||
| 668 | printk("RPC: %ld %ld %ld %ld stat\n", | ||
| 669 | jiffies, pkt_cnt, pkt_len, pkt_rtt); | ||
| 670 | pkt_rtt = pkt_len = pkt_cnt = 0; | ||
| 671 | nextstat = jiffies + 5 * HZ; | ||
| 672 | } | ||
| 673 | } | ||
| 674 | #endif | ||
| 675 | |||
| 676 | dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); | ||
| 677 | list_del_init(&req->rq_list); | ||
| 678 | req->rq_received = req->rq_private_buf.len = copied; | ||
| 679 | |||
| 680 | /* ... and wake up the process. */ | ||
| 681 | rpc_wake_up_task(task); | ||
| 682 | return; | ||
| 683 | } | ||
| 684 | |||
| 685 | static size_t | ||
| 686 | skb_read_bits(skb_reader_t *desc, void *to, size_t len) | ||
| 687 | { | ||
| 688 | if (len > desc->count) | ||
| 689 | len = desc->count; | ||
| 690 | if (skb_copy_bits(desc->skb, desc->offset, to, len)) | ||
| 691 | return 0; | ||
| 692 | desc->count -= len; | ||
| 693 | desc->offset += len; | ||
| 694 | return len; | ||
| 695 | } | ||
| 696 | |||
| 697 | static size_t | ||
| 698 | skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) | ||
| 699 | { | ||
| 700 | unsigned int csum2, pos; | ||
| 701 | |||
| 702 | if (len > desc->count) | ||
| 703 | len = desc->count; | ||
| 704 | pos = desc->offset; | ||
| 705 | csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); | ||
| 706 | desc->csum = csum_block_add(desc->csum, csum2, pos); | ||
| 707 | desc->count -= len; | ||
| 708 | desc->offset += len; | ||
| 709 | return len; | ||
| 710 | } | ||
| 711 | |||
| 712 | /* | ||
| 713 | * We have set things up such that we perform the checksum of the UDP | ||
| 714 | * packet in parallel with the copies into the RPC client iovec. -DaveM | ||
| 715 | */ | ||
| 716 | int | ||
| 717 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | ||
| 718 | { | ||
| 719 | skb_reader_t desc; | ||
| 720 | |||
| 721 | desc.skb = skb; | ||
| 722 | desc.offset = sizeof(struct udphdr); | ||
| 723 | desc.count = skb->len - desc.offset; | ||
| 724 | |||
| 725 | if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
| 726 | goto no_checksum; | ||
| 727 | |||
| 728 | desc.csum = csum_partial(skb->data, desc.offset, skb->csum); | ||
| 729 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) | ||
| 730 | return -1; | ||
| 731 | if (desc.offset != skb->len) { | ||
| 732 | unsigned int csum2; | ||
| 733 | csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); | ||
| 734 | desc.csum = csum_block_add(desc.csum, csum2, desc.offset); | ||
| 735 | } | ||
| 736 | if (desc.count) | ||
| 737 | return -1; | ||
| 738 | if ((unsigned short)csum_fold(desc.csum)) | ||
| 739 | return -1; | ||
| 740 | return 0; | ||
| 741 | no_checksum: | ||
| 742 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) | ||
| 743 | return -1; | ||
| 744 | if (desc.count) | ||
| 745 | return -1; | ||
| 746 | return 0; | ||
| 747 | } | ||
| 748 | |||
| 749 | /* | ||
| 750 | * Input handler for RPC replies. Called from a bottom half and hence | ||
| 751 | * atomic. | ||
| 752 | */ | ||
| 753 | static void | ||
| 754 | udp_data_ready(struct sock *sk, int len) | ||
| 755 | { | ||
| 756 | struct rpc_task *task; | ||
| 757 | struct rpc_xprt *xprt; | ||
| 758 | struct rpc_rqst *rovr; | ||
| 759 | struct sk_buff *skb; | ||
| 760 | int err, repsize, copied; | ||
| 761 | u32 _xid, *xp; | ||
| 762 | |||
| 763 | read_lock(&sk->sk_callback_lock); | ||
| 764 | dprintk("RPC: udp_data_ready...\n"); | ||
| 765 | if (!(xprt = xprt_from_sock(sk))) { | ||
| 766 | printk("RPC: udp_data_ready request not found!\n"); | ||
| 767 | goto out; | ||
| 768 | } | ||
| 769 | |||
| 770 | dprintk("RPC: udp_data_ready client %p\n", xprt); | ||
| 771 | |||
| 772 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
| 773 | goto out; | ||
| 774 | |||
| 775 | if (xprt->shutdown) | ||
| 776 | goto dropit; | ||
| 777 | |||
| 778 | repsize = skb->len - sizeof(struct udphdr); | ||
| 779 | if (repsize < 4) { | ||
| 780 | printk("RPC: impossible RPC reply size %d!\n", repsize); | ||
| 781 | goto dropit; | ||
| 782 | } | ||
| 783 | |||
| 784 | /* Copy the XID from the skb... */ | ||
| 785 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
| 786 | sizeof(_xid), &_xid); | ||
| 787 | if (xp == NULL) | ||
| 788 | goto dropit; | ||
| 789 | |||
| 790 | /* Look up and lock the request corresponding to the given XID */ | ||
| 791 | spin_lock(&xprt->sock_lock); | ||
| 792 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
| 793 | if (!rovr) | ||
| 794 | goto out_unlock; | ||
| 795 | task = rovr->rq_task; | ||
| 796 | |||
| 797 | dprintk("RPC: %4d received reply\n", task->tk_pid); | ||
| 798 | |||
| 799 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
| 800 | copied = repsize; | ||
| 801 | |||
| 802 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
| 803 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
| 804 | goto out_unlock; | ||
| 805 | |||
| 806 | /* Something worked... */ | ||
| 807 | dst_confirm(skb->dst); | ||
| 808 | |||
| 809 | xprt_complete_rqst(xprt, rovr, copied); | ||
| 810 | |||
| 811 | out_unlock: | ||
| 812 | spin_unlock(&xprt->sock_lock); | ||
| 813 | dropit: | ||
| 814 | skb_free_datagram(sk, skb); | ||
| 815 | out: | ||
| 816 | read_unlock(&sk->sk_callback_lock); | ||
| 817 | } | ||
| 818 | |||
| 819 | /* | ||
| 820 | * Copy from an skb into memory and shrink the skb. | ||
| 821 | */ | ||
| 822 | static inline size_t | ||
| 823 | tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
| 824 | { | ||
| 825 | if (len > desc->count) | ||
| 826 | len = desc->count; | ||
| 827 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) { | ||
| 828 | dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", | ||
| 829 | len, desc->count); | ||
| 830 | return 0; | ||
| 831 | } | ||
| 832 | desc->offset += len; | ||
| 833 | desc->count -= len; | ||
| 834 | dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", | ||
| 835 | len, desc->count); | ||
| 836 | return len; | ||
| 837 | } | ||
| 838 | |||
| 839 | /* | ||
| 840 | * TCP read fragment marker | ||
| 841 | */ | ||
| 842 | static inline void | ||
| 843 | tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 844 | { | ||
| 845 | size_t len, used; | ||
| 846 | char *p; | ||
| 847 | |||
| 848 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
| 849 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
| 850 | used = tcp_copy_data(desc, p, len); | ||
| 851 | xprt->tcp_offset += used; | ||
| 852 | if (used != len) | ||
| 853 | return; | ||
| 854 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
| 855 | if (xprt->tcp_reclen & 0x80000000) | ||
| 856 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
| 857 | else | ||
| 858 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
| 859 | xprt->tcp_reclen &= 0x7fffffff; | ||
| 860 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
| 861 | xprt->tcp_offset = 0; | ||
| 862 | /* Sanity check of the record length */ | ||
| 863 | if (xprt->tcp_reclen < 4) { | ||
| 864 | printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); | ||
| 865 | xprt_disconnect(xprt); | ||
| 866 | } | ||
| 867 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
| 868 | xprt->tcp_reclen); | ||
| 869 | } | ||
| 870 | |||
| 871 | static void | ||
| 872 | tcp_check_recm(struct rpc_xprt *xprt) | ||
| 873 | { | ||
| 874 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", | ||
| 875 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); | ||
| 876 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
| 877 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
| 878 | xprt->tcp_offset = 0; | ||
| 879 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
| 880 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 881 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
| 882 | xprt->tcp_copied = 0; | ||
| 883 | } | ||
| 884 | } | ||
| 885 | } | ||
| 886 | |||
| 887 | /* | ||
| 888 | * TCP read xid | ||
| 889 | */ | ||
| 890 | static inline void | ||
| 891 | tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 892 | { | ||
| 893 | size_t len, used; | ||
| 894 | char *p; | ||
| 895 | |||
| 896 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
| 897 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
| 898 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
| 899 | used = tcp_copy_data(desc, p, len); | ||
| 900 | xprt->tcp_offset += used; | ||
| 901 | if (used != len) | ||
| 902 | return; | ||
| 903 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
| 904 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
| 905 | xprt->tcp_copied = 4; | ||
| 906 | dprintk("RPC: reading reply for XID %08x\n", | ||
| 907 | ntohl(xprt->tcp_xid)); | ||
| 908 | tcp_check_recm(xprt); | ||
| 909 | } | ||
| 910 | |||
| 911 | /* | ||
| 912 | * TCP read and complete request | ||
| 913 | */ | ||
| 914 | static inline void | ||
| 915 | tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 916 | { | ||
| 917 | struct rpc_rqst *req; | ||
| 918 | struct xdr_buf *rcvbuf; | ||
| 919 | size_t len; | ||
| 920 | ssize_t r; | ||
| 921 | |||
| 922 | /* Find and lock the request corresponding to this xid */ | ||
| 923 | spin_lock(&xprt->sock_lock); | ||
| 924 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
| 925 | if (!req) { | ||
| 926 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 927 | dprintk("RPC: XID %08x request not found!\n", | ||
| 928 | ntohl(xprt->tcp_xid)); | ||
| 929 | spin_unlock(&xprt->sock_lock); | ||
| 930 | return; | ||
| 931 | } | ||
| 932 | |||
| 933 | rcvbuf = &req->rq_private_buf; | ||
| 934 | len = desc->count; | ||
| 935 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
| 936 | skb_reader_t my_desc; | ||
| 937 | |||
| 938 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
| 939 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
| 940 | my_desc.count = len; | ||
| 941 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
| 942 | &my_desc, tcp_copy_data); | ||
| 943 | desc->count -= r; | ||
| 944 | desc->offset += r; | ||
| 945 | } else | ||
| 946 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
| 947 | desc, tcp_copy_data); | ||
| 948 | |||
| 949 | if (r > 0) { | ||
| 950 | xprt->tcp_copied += r; | ||
| 951 | xprt->tcp_offset += r; | ||
| 952 | } | ||
| 953 | if (r != len) { | ||
| 954 | /* Error when copying to the receive buffer, | ||
| 955 | * usually because we weren't able to allocate | ||
| 956 | * additional buffer pages. All we can do now | ||
| 957 | * is turn off XPRT_COPY_DATA, so the request | ||
| 958 | * will not receive any additional updates, | ||
| 959 | * and time out. | ||
| 960 | * Any remaining data from this record will | ||
| 961 | * be discarded. | ||
| 962 | */ | ||
| 963 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 964 | dprintk("RPC: XID %08x truncated request\n", | ||
| 965 | ntohl(xprt->tcp_xid)); | ||
| 966 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
| 967 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
| 968 | goto out; | ||
| 969 | } | ||
| 970 | |||
| 971 | dprintk("RPC: XID %08x read %Zd bytes\n", | ||
| 972 | ntohl(xprt->tcp_xid), r); | ||
| 973 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
| 974 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
| 975 | |||
| 976 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
| 977 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 978 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
| 979 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
| 980 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 981 | } | ||
| 982 | |||
| 983 | out: | ||
| 984 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { | ||
| 985 | dprintk("RPC: %4d received reply complete\n", | ||
| 986 | req->rq_task->tk_pid); | ||
| 987 | xprt_complete_rqst(xprt, req, xprt->tcp_copied); | ||
| 988 | } | ||
| 989 | spin_unlock(&xprt->sock_lock); | ||
| 990 | tcp_check_recm(xprt); | ||
| 991 | } | ||
| 992 | |||
| 993 | /* | ||
| 994 | * TCP discard extra bytes from a short read | ||
| 995 | */ | ||
| 996 | static inline void | ||
| 997 | tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 998 | { | ||
| 999 | size_t len; | ||
| 1000 | |||
| 1001 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
| 1002 | if (len > desc->count) | ||
| 1003 | len = desc->count; | ||
| 1004 | desc->count -= len; | ||
| 1005 | desc->offset += len; | ||
| 1006 | xprt->tcp_offset += len; | ||
| 1007 | dprintk("RPC: discarded %Zu bytes\n", len); | ||
| 1008 | tcp_check_recm(xprt); | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | /* | ||
| 1012 | * TCP record receive routine | ||
| 1013 | * We first have to grab the record marker, then the XID, then the data. | ||
| 1014 | */ | 625 | */ |
| 1015 | static int | 626 | void xprt_update_rtt(struct rpc_task *task) |
| 1016 | tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, | ||
| 1017 | unsigned int offset, size_t len) | ||
| 1018 | { | ||
| 1019 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
| 1020 | skb_reader_t desc = { | ||
| 1021 | .skb = skb, | ||
| 1022 | .offset = offset, | ||
| 1023 | .count = len, | ||
| 1024 | .csum = 0 | ||
| 1025 | }; | ||
| 1026 | |||
| 1027 | dprintk("RPC: tcp_data_recv\n"); | ||
| 1028 | do { | ||
| 1029 | /* Read in a new fragment marker if necessary */ | ||
| 1030 | /* Can we ever really expect to get completely empty fragments? */ | ||
| 1031 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
| 1032 | tcp_read_fraghdr(xprt, &desc); | ||
| 1033 | continue; | ||
| 1034 | } | ||
| 1035 | /* Read in the xid if necessary */ | ||
| 1036 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
| 1037 | tcp_read_xid(xprt, &desc); | ||
| 1038 | continue; | ||
| 1039 | } | ||
| 1040 | /* Read in the request data */ | ||
| 1041 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
| 1042 | tcp_read_request(xprt, &desc); | ||
| 1043 | continue; | ||
| 1044 | } | ||
| 1045 | /* Skip over any trailing bytes on short reads */ | ||
| 1046 | tcp_read_discard(xprt, &desc); | ||
| 1047 | } while (desc.count); | ||
| 1048 | dprintk("RPC: tcp_data_recv done\n"); | ||
| 1049 | return len - desc.count; | ||
| 1050 | } | ||
| 1051 | |||
| 1052 | static void tcp_data_ready(struct sock *sk, int bytes) | ||
| 1053 | { | 627 | { |
| 1054 | struct rpc_xprt *xprt; | 628 | struct rpc_rqst *req = task->tk_rqstp; |
| 1055 | read_descriptor_t rd_desc; | 629 | struct rpc_rtt *rtt = task->tk_client->cl_rtt; |
| 1056 | 630 | unsigned timer = task->tk_msg.rpc_proc->p_timer; | |
| 1057 | read_lock(&sk->sk_callback_lock); | ||
| 1058 | dprintk("RPC: tcp_data_ready...\n"); | ||
| 1059 | if (!(xprt = xprt_from_sock(sk))) { | ||
| 1060 | printk("RPC: tcp_data_ready socket info not found!\n"); | ||
| 1061 | goto out; | ||
| 1062 | } | ||
| 1063 | if (xprt->shutdown) | ||
| 1064 | goto out; | ||
| 1065 | |||
| 1066 | /* We use rd_desc to pass struct xprt to tcp_data_recv */ | ||
| 1067 | rd_desc.arg.data = xprt; | ||
| 1068 | rd_desc.count = 65536; | ||
| 1069 | tcp_read_sock(sk, &rd_desc, tcp_data_recv); | ||
| 1070 | out: | ||
| 1071 | read_unlock(&sk->sk_callback_lock); | ||
| 1072 | } | ||
| 1073 | |||
| 1074 | static void | ||
| 1075 | tcp_state_change(struct sock *sk) | ||
| 1076 | { | ||
| 1077 | struct rpc_xprt *xprt; | ||
| 1078 | 631 | ||
| 1079 | read_lock(&sk->sk_callback_lock); | 632 | if (timer) { |
| 1080 | if (!(xprt = xprt_from_sock(sk))) | 633 | if (req->rq_ntrans == 1) |
| 1081 | goto out; | 634 | rpc_update_rtt(rtt, timer, |
| 1082 | dprintk("RPC: tcp_state_change client %p...\n", xprt); | 635 | (long)jiffies - req->rq_xtime); |
| 1083 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | 636 | rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); |
| 1084 | sk->sk_state, xprt_connected(xprt), | ||
| 1085 | sock_flag(sk, SOCK_DEAD), | ||
| 1086 | sock_flag(sk, SOCK_ZAPPED)); | ||
| 1087 | |||
| 1088 | switch (sk->sk_state) { | ||
| 1089 | case TCP_ESTABLISHED: | ||
| 1090 | spin_lock_bh(&xprt->sock_lock); | ||
| 1091 | if (!xprt_test_and_set_connected(xprt)) { | ||
| 1092 | /* Reset TCP record info */ | ||
| 1093 | xprt->tcp_offset = 0; | ||
| 1094 | xprt->tcp_reclen = 0; | ||
| 1095 | xprt->tcp_copied = 0; | ||
| 1096 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
| 1097 | rpc_wake_up(&xprt->pending); | ||
| 1098 | } | ||
| 1099 | spin_unlock_bh(&xprt->sock_lock); | ||
| 1100 | break; | ||
| 1101 | case TCP_SYN_SENT: | ||
| 1102 | case TCP_SYN_RECV: | ||
| 1103 | break; | ||
| 1104 | default: | ||
| 1105 | xprt_disconnect(xprt); | ||
| 1106 | break; | ||
| 1107 | } | 637 | } |
| 1108 | out: | ||
| 1109 | read_unlock(&sk->sk_callback_lock); | ||
| 1110 | } | 638 | } |
| 1111 | 639 | ||
| 1112 | /* | 640 | /** |
| 1113 | * Called when more output buffer space is available for this socket. | 641 | * xprt_complete_rqst - called when reply processing is complete |
| 1114 | * We try not to wake our writers until they can make "significant" | 642 | * @task: RPC request that recently completed |
| 1115 | * progress, otherwise we'll waste resources thrashing sock_sendmsg | 643 | * @copied: actual number of bytes received from the transport |
| 1116 | * with a bunch of small requests. | 644 | * |
| 645 | * Caller holds transport lock. | ||
| 1117 | */ | 646 | */ |
| 1118 | static void | 647 | void xprt_complete_rqst(struct rpc_task *task, int copied) |
| 1119 | xprt_write_space(struct sock *sk) | ||
| 1120 | { | 648 | { |
| 1121 | struct rpc_xprt *xprt; | 649 | struct rpc_rqst *req = task->tk_rqstp; |
| 1122 | struct socket *sock; | ||
| 1123 | |||
| 1124 | read_lock(&sk->sk_callback_lock); | ||
| 1125 | if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) | ||
| 1126 | goto out; | ||
| 1127 | if (xprt->shutdown) | ||
| 1128 | goto out; | ||
| 1129 | |||
| 1130 | /* Wait until we have enough socket memory */ | ||
| 1131 | if (xprt->stream) { | ||
| 1132 | /* from net/core/stream.c:sk_stream_write_space */ | ||
| 1133 | if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) | ||
| 1134 | goto out; | ||
| 1135 | } else { | ||
| 1136 | /* from net/core/sock.c:sock_def_write_space */ | ||
| 1137 | if (!sock_writeable(sk)) | ||
| 1138 | goto out; | ||
| 1139 | } | ||
| 1140 | 650 | ||
| 1141 | if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) | 651 | dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", |
| 1142 | goto out; | 652 | task->tk_pid, ntohl(req->rq_xid), copied); |
| 1143 | 653 | ||
| 1144 | spin_lock_bh(&xprt->sock_lock); | 654 | list_del_init(&req->rq_list); |
| 1145 | if (xprt->snd_task) | 655 | req->rq_received = req->rq_private_buf.len = copied; |
| 1146 | rpc_wake_up_task(xprt->snd_task); | 656 | rpc_wake_up_task(task); |
| 1147 | spin_unlock_bh(&xprt->sock_lock); | ||
| 1148 | out: | ||
| 1149 | read_unlock(&sk->sk_callback_lock); | ||
| 1150 | } | 657 | } |
| 1151 | 658 | ||
| 1152 | /* | 659 | static void xprt_timer(struct rpc_task *task) |
| 1153 | * RPC receive timeout handler. | ||
| 1154 | */ | ||
| 1155 | static void | ||
| 1156 | xprt_timer(struct rpc_task *task) | ||
| 1157 | { | 660 | { |
| 1158 | struct rpc_rqst *req = task->tk_rqstp; | 661 | struct rpc_rqst *req = task->tk_rqstp; |
| 1159 | struct rpc_xprt *xprt = req->rq_xprt; | 662 | struct rpc_xprt *xprt = req->rq_xprt; |
| 1160 | 663 | ||
| 1161 | spin_lock(&xprt->sock_lock); | 664 | dprintk("RPC: %4d xprt_timer\n", task->tk_pid); |
| 1162 | if (req->rq_received) | ||
| 1163 | goto out; | ||
| 1164 | |||
| 1165 | xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); | ||
| 1166 | __xprt_put_cong(xprt, req); | ||
| 1167 | 665 | ||
| 1168 | dprintk("RPC: %4d xprt_timer (%s request)\n", | 666 | spin_lock(&xprt->transport_lock); |
| 1169 | task->tk_pid, req ? "pending" : "backlogged"); | 667 | if (!req->rq_received) { |
| 1170 | 668 | if (xprt->ops->timer) | |
| 1171 | task->tk_status = -ETIMEDOUT; | 669 | xprt->ops->timer(task); |
| 1172 | out: | 670 | task->tk_status = -ETIMEDOUT; |
| 671 | } | ||
| 1173 | task->tk_timeout = 0; | 672 | task->tk_timeout = 0; |
| 1174 | rpc_wake_up_task(task); | 673 | rpc_wake_up_task(task); |
| 1175 | spin_unlock(&xprt->sock_lock); | 674 | spin_unlock(&xprt->transport_lock); |
| 1176 | } | 675 | } |
| 1177 | 676 | ||
| 1178 | /* | 677 | /** |
| 1179 | * Place the actual RPC call. | 678 | * xprt_prepare_transmit - reserve the transport before sending a request |
| 1180 | * We have to copy the iovec because sendmsg fiddles with its contents. | 679 | * @task: RPC task about to send a request |
| 680 | * | ||
| 1181 | */ | 681 | */ |
| 1182 | int | 682 | int xprt_prepare_transmit(struct rpc_task *task) |
| 1183 | xprt_prepare_transmit(struct rpc_task *task) | ||
| 1184 | { | 683 | { |
| 1185 | struct rpc_rqst *req = task->tk_rqstp; | 684 | struct rpc_rqst *req = task->tk_rqstp; |
| 1186 | struct rpc_xprt *xprt = req->rq_xprt; | 685 | struct rpc_xprt *xprt = req->rq_xprt; |
| @@ -1191,12 +690,12 @@ xprt_prepare_transmit(struct rpc_task *task) | |||
| 1191 | if (xprt->shutdown) | 690 | if (xprt->shutdown) |
| 1192 | return -EIO; | 691 | return -EIO; |
| 1193 | 692 | ||
| 1194 | spin_lock_bh(&xprt->sock_lock); | 693 | spin_lock_bh(&xprt->transport_lock); |
| 1195 | if (req->rq_received && !req->rq_bytes_sent) { | 694 | if (req->rq_received && !req->rq_bytes_sent) { |
| 1196 | err = req->rq_received; | 695 | err = req->rq_received; |
| 1197 | goto out_unlock; | 696 | goto out_unlock; |
| 1198 | } | 697 | } |
| 1199 | if (!__xprt_lock_write(xprt, task)) { | 698 | if (!xprt->ops->reserve_xprt(task)) { |
| 1200 | err = -EAGAIN; | 699 | err = -EAGAIN; |
| 1201 | goto out_unlock; | 700 | goto out_unlock; |
| 1202 | } | 701 | } |
| @@ -1206,39 +705,42 @@ xprt_prepare_transmit(struct rpc_task *task) | |||
| 1206 | goto out_unlock; | 705 | goto out_unlock; |
| 1207 | } | 706 | } |
| 1208 | out_unlock: | 707 | out_unlock: |
| 1209 | spin_unlock_bh(&xprt->sock_lock); | 708 | spin_unlock_bh(&xprt->transport_lock); |
| 1210 | return err; | 709 | return err; |
| 1211 | } | 710 | } |
| 1212 | 711 | ||
| 1213 | void | 712 | void |
| 1214 | xprt_transmit(struct rpc_task *task) | 713 | xprt_abort_transmit(struct rpc_task *task) |
| 714 | { | ||
| 715 | struct rpc_xprt *xprt = task->tk_xprt; | ||
| 716 | |||
| 717 | xprt_release_write(xprt, task); | ||
| 718 | } | ||
| 719 | |||
| 720 | /** | ||
| 721 | * xprt_transmit - send an RPC request on a transport | ||
| 722 | * @task: controlling RPC task | ||
| 723 | * | ||
| 724 | * We have to copy the iovec because sendmsg fiddles with its contents. | ||
| 725 | */ | ||
| 726 | void xprt_transmit(struct rpc_task *task) | ||
| 1215 | { | 727 | { |
| 1216 | struct rpc_clnt *clnt = task->tk_client; | ||
| 1217 | struct rpc_rqst *req = task->tk_rqstp; | 728 | struct rpc_rqst *req = task->tk_rqstp; |
| 1218 | struct rpc_xprt *xprt = req->rq_xprt; | 729 | struct rpc_xprt *xprt = req->rq_xprt; |
| 1219 | int status, retry = 0; | 730 | int status; |
| 1220 | |||
| 1221 | 731 | ||
| 1222 | dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); | 732 | dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); |
| 1223 | 733 | ||
| 1224 | /* set up everything as needed. */ | ||
| 1225 | /* Write the record marker */ | ||
| 1226 | if (xprt->stream) { | ||
| 1227 | u32 *marker = req->rq_svec[0].iov_base; | ||
| 1228 | |||
| 1229 | *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); | ||
| 1230 | } | ||
| 1231 | |||
| 1232 | smp_rmb(); | 734 | smp_rmb(); |
| 1233 | if (!req->rq_received) { | 735 | if (!req->rq_received) { |
| 1234 | if (list_empty(&req->rq_list)) { | 736 | if (list_empty(&req->rq_list)) { |
| 1235 | spin_lock_bh(&xprt->sock_lock); | 737 | spin_lock_bh(&xprt->transport_lock); |
| 1236 | /* Update the softirq receive buffer */ | 738 | /* Update the softirq receive buffer */ |
| 1237 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, | 739 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, |
| 1238 | sizeof(req->rq_private_buf)); | 740 | sizeof(req->rq_private_buf)); |
| 1239 | /* Add request to the receive list */ | 741 | /* Add request to the receive list */ |
| 1240 | list_add_tail(&req->rq_list, &xprt->recv); | 742 | list_add_tail(&req->rq_list, &xprt->recv); |
| 1241 | spin_unlock_bh(&xprt->sock_lock); | 743 | spin_unlock_bh(&xprt->transport_lock); |
| 1242 | xprt_reset_majortimeo(req); | 744 | xprt_reset_majortimeo(req); |
| 1243 | /* Turn off autodisconnect */ | 745 | /* Turn off autodisconnect */ |
| 1244 | del_singleshot_timer_sync(&xprt->timer); | 746 | del_singleshot_timer_sync(&xprt->timer); |
| @@ -1246,40 +748,19 @@ xprt_transmit(struct rpc_task *task) | |||
| 1246 | } else if (!req->rq_bytes_sent) | 748 | } else if (!req->rq_bytes_sent) |
| 1247 | return; | 749 | return; |
| 1248 | 750 | ||
| 1249 | /* Continue transmitting the packet/record. We must be careful | 751 | status = xprt->ops->send_request(task); |
| 1250 | * to cope with writespace callbacks arriving _after_ we have | 752 | if (status == 0) { |
| 1251 | * called xprt_sendmsg(). | 753 | dprintk("RPC: %4d xmit complete\n", task->tk_pid); |
| 1252 | */ | 754 | spin_lock_bh(&xprt->transport_lock); |
| 1253 | while (1) { | 755 | xprt->ops->set_retrans_timeout(task); |
| 1254 | req->rq_xtime = jiffies; | 756 | /* Don't race with disconnect */ |
| 1255 | status = xprt_sendmsg(xprt, req); | 757 | if (!xprt_connected(xprt)) |
| 1256 | 758 | task->tk_status = -ENOTCONN; | |
| 1257 | if (status < 0) | 759 | else if (!req->rq_received) |
| 1258 | break; | 760 | rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); |
| 1259 | 761 | xprt->ops->release_xprt(xprt, task); | |
| 1260 | if (xprt->stream) { | 762 | spin_unlock_bh(&xprt->transport_lock); |
| 1261 | req->rq_bytes_sent += status; | 763 | return; |
| 1262 | |||
| 1263 | /* If we've sent the entire packet, immediately | ||
| 1264 | * reset the count of bytes sent. */ | ||
| 1265 | if (req->rq_bytes_sent >= req->rq_slen) { | ||
| 1266 | req->rq_bytes_sent = 0; | ||
| 1267 | goto out_receive; | ||
| 1268 | } | ||
| 1269 | } else { | ||
| 1270 | if (status >= req->rq_slen) | ||
| 1271 | goto out_receive; | ||
| 1272 | status = -EAGAIN; | ||
| 1273 | break; | ||
| 1274 | } | ||
| 1275 | |||
| 1276 | dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", | ||
| 1277 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
| 1278 | req->rq_slen); | ||
| 1279 | |||
| 1280 | status = -EAGAIN; | ||
| 1281 | if (retry++ > 50) | ||
| 1282 | break; | ||
| 1283 | } | 764 | } |
| 1284 | 765 | ||
| 1285 | /* Note: at this point, task->tk_sleeping has not yet been set, | 766 | /* Note: at this point, task->tk_sleeping has not yet been set, |
| @@ -1289,60 +770,19 @@ xprt_transmit(struct rpc_task *task) | |||
| 1289 | task->tk_status = status; | 770 | task->tk_status = status; |
| 1290 | 771 | ||
| 1291 | switch (status) { | 772 | switch (status) { |
| 1292 | case -EAGAIN: | ||
| 1293 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
| 1294 | /* Protect against races with xprt_write_space */ | ||
| 1295 | spin_lock_bh(&xprt->sock_lock); | ||
| 1296 | /* Don't race with disconnect */ | ||
| 1297 | if (!xprt_connected(xprt)) | ||
| 1298 | task->tk_status = -ENOTCONN; | ||
| 1299 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { | ||
| 1300 | task->tk_timeout = req->rq_timeout; | ||
| 1301 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | ||
| 1302 | } | ||
| 1303 | spin_unlock_bh(&xprt->sock_lock); | ||
| 1304 | return; | ||
| 1305 | } | ||
| 1306 | /* Keep holding the socket if it is blocked */ | ||
| 1307 | rpc_delay(task, HZ>>4); | ||
| 1308 | return; | ||
| 1309 | case -ECONNREFUSED: | 773 | case -ECONNREFUSED: |
| 1310 | task->tk_timeout = RPC_REESTABLISH_TIMEOUT; | ||
| 1311 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | 774 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
| 775 | case -EAGAIN: | ||
| 1312 | case -ENOTCONN: | 776 | case -ENOTCONN: |
| 1313 | return; | 777 | return; |
| 1314 | default: | 778 | default: |
| 1315 | if (xprt->stream) | 779 | break; |
| 1316 | xprt_disconnect(xprt); | ||
| 1317 | } | 780 | } |
| 1318 | xprt_release_write(xprt, task); | 781 | xprt_release_write(xprt, task); |
| 1319 | return; | 782 | return; |
| 1320 | out_receive: | ||
| 1321 | dprintk("RPC: %4d xmit complete\n", task->tk_pid); | ||
| 1322 | /* Set the task's receive timeout value */ | ||
| 1323 | spin_lock_bh(&xprt->sock_lock); | ||
| 1324 | if (!xprt->nocong) { | ||
| 1325 | int timer = task->tk_msg.rpc_proc->p_timer; | ||
| 1326 | task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); | ||
| 1327 | task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; | ||
| 1328 | if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) | ||
| 1329 | task->tk_timeout = xprt->timeout.to_maxval; | ||
| 1330 | } else | ||
| 1331 | task->tk_timeout = req->rq_timeout; | ||
| 1332 | /* Don't race with disconnect */ | ||
| 1333 | if (!xprt_connected(xprt)) | ||
| 1334 | task->tk_status = -ENOTCONN; | ||
| 1335 | else if (!req->rq_received) | ||
| 1336 | rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); | ||
| 1337 | __xprt_release_write(xprt, task); | ||
| 1338 | spin_unlock_bh(&xprt->sock_lock); | ||
| 1339 | } | 783 | } |
| 1340 | 784 | ||
| 1341 | /* | 785 | static inline void do_xprt_reserve(struct rpc_task *task) |
| 1342 | * Reserve an RPC call slot. | ||
| 1343 | */ | ||
| 1344 | static inline void | ||
| 1345 | do_xprt_reserve(struct rpc_task *task) | ||
| 1346 | { | 786 | { |
| 1347 | struct rpc_xprt *xprt = task->tk_xprt; | 787 | struct rpc_xprt *xprt = task->tk_xprt; |
| 1348 | 788 | ||
| @@ -1362,22 +802,25 @@ do_xprt_reserve(struct rpc_task *task) | |||
| 1362 | rpc_sleep_on(&xprt->backlog, task, NULL, NULL); | 802 | rpc_sleep_on(&xprt->backlog, task, NULL, NULL); |
| 1363 | } | 803 | } |
| 1364 | 804 | ||
| 1365 | void | 805 | /** |
| 1366 | xprt_reserve(struct rpc_task *task) | 806 | * xprt_reserve - allocate an RPC request slot |
| 807 | * @task: RPC task requesting a slot allocation | ||
| 808 | * | ||
| 809 | * If no more slots are available, place the task on the transport's | ||
| 810 | * backlog queue. | ||
| 811 | */ | ||
| 812 | void xprt_reserve(struct rpc_task *task) | ||
| 1367 | { | 813 | { |
| 1368 | struct rpc_xprt *xprt = task->tk_xprt; | 814 | struct rpc_xprt *xprt = task->tk_xprt; |
| 1369 | 815 | ||
| 1370 | task->tk_status = -EIO; | 816 | task->tk_status = -EIO; |
| 1371 | if (!xprt->shutdown) { | 817 | if (!xprt->shutdown) { |
| 1372 | spin_lock(&xprt->xprt_lock); | 818 | spin_lock(&xprt->reserve_lock); |
| 1373 | do_xprt_reserve(task); | 819 | do_xprt_reserve(task); |
| 1374 | spin_unlock(&xprt->xprt_lock); | 820 | spin_unlock(&xprt->reserve_lock); |
| 1375 | } | 821 | } |
| 1376 | } | 822 | } |
| 1377 | 823 | ||
| 1378 | /* | ||
| 1379 | * Allocate a 'unique' XID | ||
| 1380 | */ | ||
| 1381 | static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) | 824 | static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) |
| 1382 | { | 825 | { |
| 1383 | return xprt->xid++; | 826 | return xprt->xid++; |
| @@ -1388,11 +831,7 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt) | |||
| 1388 | get_random_bytes(&xprt->xid, sizeof(xprt->xid)); | 831 | get_random_bytes(&xprt->xid, sizeof(xprt->xid)); |
| 1389 | } | 832 | } |
| 1390 | 833 | ||
| 1391 | /* | 834 | static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) |
| 1392 | * Initialize RPC request | ||
| 1393 | */ | ||
| 1394 | static void | ||
| 1395 | xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | ||
| 1396 | { | 835 | { |
| 1397 | struct rpc_rqst *req = task->tk_rqstp; | 836 | struct rpc_rqst *req = task->tk_rqstp; |
| 1398 | 837 | ||
| @@ -1400,128 +839,104 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | |||
| 1400 | req->rq_task = task; | 839 | req->rq_task = task; |
| 1401 | req->rq_xprt = xprt; | 840 | req->rq_xprt = xprt; |
| 1402 | req->rq_xid = xprt_alloc_xid(xprt); | 841 | req->rq_xid = xprt_alloc_xid(xprt); |
| 842 | req->rq_release_snd_buf = NULL; | ||
| 1403 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, | 843 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, |
| 1404 | req, ntohl(req->rq_xid)); | 844 | req, ntohl(req->rq_xid)); |
| 1405 | } | 845 | } |
| 1406 | 846 | ||
| 1407 | /* | 847 | /** |
| 1408 | * Release an RPC call slot | 848 | * xprt_release - release an RPC request slot |
| 849 | * @task: task which is finished with the slot | ||
| 850 | * | ||
| 1409 | */ | 851 | */ |
| 1410 | void | 852 | void xprt_release(struct rpc_task *task) |
| 1411 | xprt_release(struct rpc_task *task) | ||
| 1412 | { | 853 | { |
| 1413 | struct rpc_xprt *xprt = task->tk_xprt; | 854 | struct rpc_xprt *xprt = task->tk_xprt; |
| 1414 | struct rpc_rqst *req; | 855 | struct rpc_rqst *req; |
| 1415 | 856 | ||
| 1416 | if (!(req = task->tk_rqstp)) | 857 | if (!(req = task->tk_rqstp)) |
| 1417 | return; | 858 | return; |
| 1418 | spin_lock_bh(&xprt->sock_lock); | 859 | spin_lock_bh(&xprt->transport_lock); |
| 1419 | __xprt_release_write(xprt, task); | 860 | xprt->ops->release_xprt(xprt, task); |
| 1420 | __xprt_put_cong(xprt, req); | 861 | if (xprt->ops->release_request) |
| 862 | xprt->ops->release_request(task); | ||
| 1421 | if (!list_empty(&req->rq_list)) | 863 | if (!list_empty(&req->rq_list)) |
| 1422 | list_del(&req->rq_list); | 864 | list_del(&req->rq_list); |
| 1423 | xprt->last_used = jiffies; | 865 | xprt->last_used = jiffies; |
| 1424 | if (list_empty(&xprt->recv) && !xprt->shutdown) | 866 | if (list_empty(&xprt->recv) && !xprt->shutdown) |
| 1425 | mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); | 867 | mod_timer(&xprt->timer, |
| 1426 | spin_unlock_bh(&xprt->sock_lock); | 868 | xprt->last_used + xprt->idle_timeout); |
| 869 | spin_unlock_bh(&xprt->transport_lock); | ||
| 1427 | task->tk_rqstp = NULL; | 870 | task->tk_rqstp = NULL; |
| 871 | if (req->rq_release_snd_buf) | ||
| 872 | req->rq_release_snd_buf(req); | ||
| 1428 | memset(req, 0, sizeof(*req)); /* mark unused */ | 873 | memset(req, 0, sizeof(*req)); /* mark unused */ |
| 1429 | 874 | ||
| 1430 | dprintk("RPC: %4d release request %p\n", task->tk_pid, req); | 875 | dprintk("RPC: %4d release request %p\n", task->tk_pid, req); |
| 1431 | 876 | ||
| 1432 | spin_lock(&xprt->xprt_lock); | 877 | spin_lock(&xprt->reserve_lock); |
| 1433 | list_add(&req->rq_list, &xprt->free); | 878 | list_add(&req->rq_list, &xprt->free); |
| 1434 | xprt_clear_backlog(xprt); | 879 | rpc_wake_up_next(&xprt->backlog); |
| 1435 | spin_unlock(&xprt->xprt_lock); | 880 | spin_unlock(&xprt->reserve_lock); |
| 1436 | } | ||
| 1437 | |||
| 1438 | /* | ||
| 1439 | * Set default timeout parameters | ||
| 1440 | */ | ||
| 1441 | static void | ||
| 1442 | xprt_default_timeout(struct rpc_timeout *to, int proto) | ||
| 1443 | { | ||
| 1444 | if (proto == IPPROTO_UDP) | ||
| 1445 | xprt_set_timeout(to, 5, 5 * HZ); | ||
| 1446 | else | ||
| 1447 | xprt_set_timeout(to, 5, 60 * HZ); | ||
| 1448 | } | 881 | } |
| 1449 | 882 | ||
| 1450 | /* | 883 | /** |
| 1451 | * Set constant timeout | 884 | * xprt_set_timeout - set constant RPC timeout |
| 885 | * @to: RPC timeout parameters to set up | ||
| 886 | * @retr: number of retries | ||
| 887 | * @incr: amount of increase after each retry | ||
| 888 | * | ||
| 1452 | */ | 889 | */ |
| 1453 | void | 890 | void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) |
| 1454 | xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) | ||
| 1455 | { | 891 | { |
| 1456 | to->to_initval = | 892 | to->to_initval = |
| 1457 | to->to_increment = incr; | 893 | to->to_increment = incr; |
| 1458 | to->to_maxval = incr * retr; | 894 | to->to_maxval = to->to_initval + (incr * retr); |
| 1459 | to->to_retries = retr; | 895 | to->to_retries = retr; |
| 1460 | to->to_exponential = 0; | 896 | to->to_exponential = 0; |
| 1461 | } | 897 | } |
| 1462 | 898 | ||
| 1463 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | 899 | static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) |
| 1464 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
| 1465 | |||
| 1466 | /* | ||
| 1467 | * Initialize an RPC client | ||
| 1468 | */ | ||
| 1469 | static struct rpc_xprt * | ||
| 1470 | xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | ||
| 1471 | { | 900 | { |
| 901 | int result; | ||
| 1472 | struct rpc_xprt *xprt; | 902 | struct rpc_xprt *xprt; |
| 1473 | unsigned int entries; | ||
| 1474 | size_t slot_table_size; | ||
| 1475 | struct rpc_rqst *req; | 903 | struct rpc_rqst *req; |
| 1476 | 904 | ||
| 1477 | dprintk("RPC: setting up %s transport...\n", | ||
| 1478 | proto == IPPROTO_UDP? "UDP" : "TCP"); | ||
| 1479 | |||
| 1480 | entries = (proto == IPPROTO_TCP)? | ||
| 1481 | xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; | ||
| 1482 | |||
| 1483 | if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) | 905 | if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) |
| 1484 | return ERR_PTR(-ENOMEM); | 906 | return ERR_PTR(-ENOMEM); |
| 1485 | memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ | 907 | memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ |
| 1486 | xprt->max_reqs = entries; | ||
| 1487 | slot_table_size = entries * sizeof(xprt->slot[0]); | ||
| 1488 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
| 1489 | if (xprt->slot == NULL) { | ||
| 1490 | kfree(xprt); | ||
| 1491 | return ERR_PTR(-ENOMEM); | ||
| 1492 | } | ||
| 1493 | memset(xprt->slot, 0, slot_table_size); | ||
| 1494 | 908 | ||
| 1495 | xprt->addr = *ap; | 909 | xprt->addr = *ap; |
| 1496 | xprt->prot = proto; | 910 | |
| 1497 | xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; | 911 | switch (proto) { |
| 1498 | if (xprt->stream) { | 912 | case IPPROTO_UDP: |
| 1499 | xprt->cwnd = RPC_MAXCWND(xprt); | 913 | result = xs_setup_udp(xprt, to); |
| 1500 | xprt->nocong = 1; | 914 | break; |
| 1501 | xprt->max_payload = (1U << 31) - 1; | 915 | case IPPROTO_TCP: |
| 1502 | } else { | 916 | result = xs_setup_tcp(xprt, to); |
| 1503 | xprt->cwnd = RPC_INITCWND; | 917 | break; |
| 1504 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | 918 | default: |
| 919 | printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", | ||
| 920 | proto); | ||
| 921 | result = -EIO; | ||
| 922 | break; | ||
| 923 | } | ||
| 924 | if (result) { | ||
| 925 | kfree(xprt); | ||
| 926 | return ERR_PTR(result); | ||
| 1505 | } | 927 | } |
| 1506 | spin_lock_init(&xprt->sock_lock); | 928 | |
| 1507 | spin_lock_init(&xprt->xprt_lock); | 929 | spin_lock_init(&xprt->transport_lock); |
| 1508 | init_waitqueue_head(&xprt->cong_wait); | 930 | spin_lock_init(&xprt->reserve_lock); |
| 1509 | 931 | ||
| 1510 | INIT_LIST_HEAD(&xprt->free); | 932 | INIT_LIST_HEAD(&xprt->free); |
| 1511 | INIT_LIST_HEAD(&xprt->recv); | 933 | INIT_LIST_HEAD(&xprt->recv); |
| 1512 | INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); | 934 | INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); |
| 1513 | INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); | ||
| 1514 | init_timer(&xprt->timer); | 935 | init_timer(&xprt->timer); |
| 1515 | xprt->timer.function = xprt_init_autodisconnect; | 936 | xprt->timer.function = xprt_init_autodisconnect; |
| 1516 | xprt->timer.data = (unsigned long) xprt; | 937 | xprt->timer.data = (unsigned long) xprt; |
| 1517 | xprt->last_used = jiffies; | 938 | xprt->last_used = jiffies; |
| 1518 | xprt->port = XPRT_MAX_RESVPORT; | 939 | xprt->cwnd = RPC_INITCWND; |
| 1519 | |||
| 1520 | /* Set timeout parameters */ | ||
| 1521 | if (to) { | ||
| 1522 | xprt->timeout = *to; | ||
| 1523 | } else | ||
| 1524 | xprt_default_timeout(&xprt->timeout, xprt->prot); | ||
| 1525 | 940 | ||
| 1526 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); | 941 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); |
| 1527 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); | 942 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); |
| @@ -1529,139 +944,25 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | |||
| 1529 | rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); | 944 | rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); |
| 1530 | 945 | ||
| 1531 | /* initialize free list */ | 946 | /* initialize free list */ |
| 1532 | for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) | 947 | for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) |
| 1533 | list_add(&req->rq_list, &xprt->free); | 948 | list_add(&req->rq_list, &xprt->free); |
| 1534 | 949 | ||
| 1535 | xprt_init_xid(xprt); | 950 | xprt_init_xid(xprt); |
| 1536 | 951 | ||
| 1537 | /* Check whether we want to use a reserved port */ | ||
| 1538 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
| 1539 | |||
| 1540 | dprintk("RPC: created transport %p with %u slots\n", xprt, | 952 | dprintk("RPC: created transport %p with %u slots\n", xprt, |
| 1541 | xprt->max_reqs); | 953 | xprt->max_reqs); |
| 1542 | 954 | ||
| 1543 | return xprt; | 955 | return xprt; |
| 1544 | } | 956 | } |
| 1545 | 957 | ||
| 1546 | /* | 958 | /** |
| 1547 | * Bind to a reserved port | 959 | * xprt_create_proto - create an RPC client transport |
| 1548 | */ | 960 | * @proto: requested transport protocol |
| 1549 | static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | 961 | * @sap: remote peer's address |
| 1550 | { | 962 | * @to: timeout parameters for new transport |
| 1551 | struct sockaddr_in myaddr = { | 963 | * |
| 1552 | .sin_family = AF_INET, | ||
| 1553 | }; | ||
| 1554 | int err, port; | ||
| 1555 | |||
| 1556 | /* Were we already bound to a given port? Try to reuse it */ | ||
| 1557 | port = xprt->port; | ||
| 1558 | do { | ||
| 1559 | myaddr.sin_port = htons(port); | ||
| 1560 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
| 1561 | sizeof(myaddr)); | ||
| 1562 | if (err == 0) { | ||
| 1563 | xprt->port = port; | ||
| 1564 | return 0; | ||
| 1565 | } | ||
| 1566 | if (--port == 0) | ||
| 1567 | port = XPRT_MAX_RESVPORT; | ||
| 1568 | } while (err == -EADDRINUSE && port != xprt->port); | ||
| 1569 | |||
| 1570 | printk("RPC: Can't bind to reserved port (%d).\n", -err); | ||
| 1571 | return err; | ||
| 1572 | } | ||
| 1573 | |||
| 1574 | static void | ||
| 1575 | xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) | ||
| 1576 | { | ||
| 1577 | struct sock *sk = sock->sk; | ||
| 1578 | |||
| 1579 | if (xprt->inet) | ||
| 1580 | return; | ||
| 1581 | |||
| 1582 | write_lock_bh(&sk->sk_callback_lock); | ||
| 1583 | sk->sk_user_data = xprt; | ||
| 1584 | xprt->old_data_ready = sk->sk_data_ready; | ||
| 1585 | xprt->old_state_change = sk->sk_state_change; | ||
| 1586 | xprt->old_write_space = sk->sk_write_space; | ||
| 1587 | if (xprt->prot == IPPROTO_UDP) { | ||
| 1588 | sk->sk_data_ready = udp_data_ready; | ||
| 1589 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
| 1590 | xprt_set_connected(xprt); | ||
| 1591 | } else { | ||
| 1592 | tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ | ||
| 1593 | sk->sk_data_ready = tcp_data_ready; | ||
| 1594 | sk->sk_state_change = tcp_state_change; | ||
| 1595 | xprt_clear_connected(xprt); | ||
| 1596 | } | ||
| 1597 | sk->sk_write_space = xprt_write_space; | ||
| 1598 | |||
| 1599 | /* Reset to new socket */ | ||
| 1600 | xprt->sock = sock; | ||
| 1601 | xprt->inet = sk; | ||
| 1602 | write_unlock_bh(&sk->sk_callback_lock); | ||
| 1603 | |||
| 1604 | return; | ||
| 1605 | } | ||
| 1606 | |||
| 1607 | /* | ||
| 1608 | * Set socket buffer length | ||
| 1609 | */ | ||
| 1610 | void | ||
| 1611 | xprt_sock_setbufsize(struct rpc_xprt *xprt) | ||
| 1612 | { | ||
| 1613 | struct sock *sk = xprt->inet; | ||
| 1614 | |||
| 1615 | if (xprt->stream) | ||
| 1616 | return; | ||
| 1617 | if (xprt->rcvsize) { | ||
| 1618 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
| 1619 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
| 1620 | } | ||
| 1621 | if (xprt->sndsize) { | ||
| 1622 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
| 1623 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
| 1624 | sk->sk_write_space(sk); | ||
| 1625 | } | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | /* | ||
| 1629 | * Datastream sockets are created here, but xprt_connect will create | ||
| 1630 | * and connect stream sockets. | ||
| 1631 | */ | ||
| 1632 | static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) | ||
| 1633 | { | ||
| 1634 | struct socket *sock; | ||
| 1635 | int type, err; | ||
| 1636 | |||
| 1637 | dprintk("RPC: xprt_create_socket(%s %d)\n", | ||
| 1638 | (proto == IPPROTO_UDP)? "udp" : "tcp", proto); | ||
| 1639 | |||
| 1640 | type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | ||
| 1641 | |||
| 1642 | if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { | ||
| 1643 | printk("RPC: can't create socket (%d).\n", -err); | ||
| 1644 | return NULL; | ||
| 1645 | } | ||
| 1646 | |||
| 1647 | /* If the caller has the capability, bind to a reserved port */ | ||
| 1648 | if (resvport && xprt_bindresvport(xprt, sock) < 0) { | ||
| 1649 | printk("RPC: can't bind to reserved port.\n"); | ||
| 1650 | goto failed; | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | return sock; | ||
| 1654 | |||
| 1655 | failed: | ||
| 1656 | sock_release(sock); | ||
| 1657 | return NULL; | ||
| 1658 | } | ||
| 1659 | |||
| 1660 | /* | ||
| 1661 | * Create an RPC client transport given the protocol and peer address. | ||
| 1662 | */ | 964 | */ |
| 1663 | struct rpc_xprt * | 965 | struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) |
| 1664 | xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | ||
| 1665 | { | 966 | { |
| 1666 | struct rpc_xprt *xprt; | 967 | struct rpc_xprt *xprt; |
| 1667 | 968 | ||
| @@ -1673,46 +974,26 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | |||
| 1673 | return xprt; | 974 | return xprt; |
| 1674 | } | 975 | } |
| 1675 | 976 | ||
| 1676 | /* | 977 | static void xprt_shutdown(struct rpc_xprt *xprt) |
| 1677 | * Prepare for transport shutdown. | ||
| 1678 | */ | ||
| 1679 | static void | ||
| 1680 | xprt_shutdown(struct rpc_xprt *xprt) | ||
| 1681 | { | 978 | { |
| 1682 | xprt->shutdown = 1; | 979 | xprt->shutdown = 1; |
| 1683 | rpc_wake_up(&xprt->sending); | 980 | rpc_wake_up(&xprt->sending); |
| 1684 | rpc_wake_up(&xprt->resend); | 981 | rpc_wake_up(&xprt->resend); |
| 1685 | rpc_wake_up(&xprt->pending); | 982 | xprt_wake_pending_tasks(xprt, -EIO); |
| 1686 | rpc_wake_up(&xprt->backlog); | 983 | rpc_wake_up(&xprt->backlog); |
| 1687 | wake_up(&xprt->cong_wait); | ||
| 1688 | del_timer_sync(&xprt->timer); | 984 | del_timer_sync(&xprt->timer); |
| 1689 | |||
| 1690 | /* synchronously wait for connect worker to finish */ | ||
| 1691 | cancel_delayed_work(&xprt->sock_connect); | ||
| 1692 | flush_scheduled_work(); | ||
| 1693 | } | 985 | } |
| 1694 | 986 | ||
| 1695 | /* | 987 | /** |
| 1696 | * Clear the xprt backlog queue | 988 | * xprt_destroy - destroy an RPC transport, killing off all requests. |
| 1697 | */ | 989 | * @xprt: transport to destroy |
| 1698 | static int | 990 | * |
| 1699 | xprt_clear_backlog(struct rpc_xprt *xprt) { | ||
| 1700 | rpc_wake_up_next(&xprt->backlog); | ||
| 1701 | wake_up(&xprt->cong_wait); | ||
| 1702 | return 1; | ||
| 1703 | } | ||
| 1704 | |||
| 1705 | /* | ||
| 1706 | * Destroy an RPC transport, killing off all requests. | ||
| 1707 | */ | 991 | */ |
| 1708 | int | 992 | int xprt_destroy(struct rpc_xprt *xprt) |
| 1709 | xprt_destroy(struct rpc_xprt *xprt) | ||
| 1710 | { | 993 | { |
| 1711 | dprintk("RPC: destroying transport %p\n", xprt); | 994 | dprintk("RPC: destroying transport %p\n", xprt); |
| 1712 | xprt_shutdown(xprt); | 995 | xprt_shutdown(xprt); |
| 1713 | xprt_disconnect(xprt); | 996 | xprt->ops->destroy(xprt); |
| 1714 | xprt_close(xprt); | ||
| 1715 | kfree(xprt->slot); | ||
| 1716 | kfree(xprt); | 997 | kfree(xprt); |
| 1717 | 998 | ||
| 1718 | return 0; | 999 | return 0; |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c new file mode 100644 index 000000000000..2e1529217e65 --- /dev/null +++ b/net/sunrpc/xprtsock.c | |||
| @@ -0,0 +1,1252 @@ | |||
| 1 | /* | ||
| 2 | * linux/net/sunrpc/xprtsock.c | ||
| 3 | * | ||
| 4 | * Client-side transport implementation for sockets. | ||
| 5 | * | ||
| 6 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
| 7 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
| 8 | * TCP NFS related read + write fixes | ||
| 9 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
| 10 | * | ||
| 11 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
| 12 | * Fix behaviour when socket buffer is full. | ||
| 13 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
| 14 | * | ||
| 15 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> | ||
| 16 | */ | ||
| 17 | |||
| 18 | #include <linux/types.h> | ||
| 19 | #include <linux/slab.h> | ||
| 20 | #include <linux/capability.h> | ||
| 21 | #include <linux/sched.h> | ||
| 22 | #include <linux/pagemap.h> | ||
| 23 | #include <linux/errno.h> | ||
| 24 | #include <linux/socket.h> | ||
| 25 | #include <linux/in.h> | ||
| 26 | #include <linux/net.h> | ||
| 27 | #include <linux/mm.h> | ||
| 28 | #include <linux/udp.h> | ||
| 29 | #include <linux/tcp.h> | ||
| 30 | #include <linux/sunrpc/clnt.h> | ||
| 31 | #include <linux/file.h> | ||
| 32 | |||
| 33 | #include <net/sock.h> | ||
| 34 | #include <net/checksum.h> | ||
| 35 | #include <net/udp.h> | ||
| 36 | #include <net/tcp.h> | ||
| 37 | |||
| 38 | /* | ||
| 39 | * How many times to try sending a request on a socket before waiting | ||
| 40 | * for the socket buffer to clear. | ||
| 41 | */ | ||
| 42 | #define XS_SENDMSG_RETRY (10U) | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Time out for an RPC UDP socket connect. UDP socket connects are | ||
| 46 | * synchronous, but we set a timeout anyway in case of resource | ||
| 47 | * exhaustion on the local host. | ||
| 48 | */ | ||
| 49 | #define XS_UDP_CONN_TO (5U * HZ) | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Wait duration for an RPC TCP connection to be established. Solaris | ||
| 53 | * NFS over TCP uses 60 seconds, for example, which is in line with how | ||
| 54 | * long a server takes to reboot. | ||
| 55 | */ | ||
| 56 | #define XS_TCP_CONN_TO (60U * HZ) | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Wait duration for a reply from the RPC portmapper. | ||
| 60 | */ | ||
| 61 | #define XS_BIND_TO (60U * HZ) | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Delay if a UDP socket connect error occurs. This is most likely some | ||
| 65 | * kind of resource problem on the local host. | ||
| 66 | */ | ||
| 67 | #define XS_UDP_REEST_TO (2U * HZ) | ||
| 68 | |||
| 69 | /* | ||
| 70 | * The reestablish timeout allows clients to delay for a bit before attempting | ||
| 71 | * to reconnect to a server that just dropped our connection. | ||
| 72 | * | ||
| 73 | * We implement an exponential backoff when trying to reestablish a TCP | ||
| 74 | * transport connection with the server. Some servers like to drop a TCP | ||
| 75 | * connection when they are overworked, so we start with a short timeout and | ||
| 76 | * increase over time if the server is down or not responding. | ||
| 77 | */ | ||
| 78 | #define XS_TCP_INIT_REEST_TO (3U * HZ) | ||
| 79 | #define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) | ||
| 80 | |||
| 81 | /* | ||
| 82 | * TCP idle timeout; client drops the transport socket if it is idle | ||
| 83 | * for this long. Note that we also timeout UDP sockets to prevent | ||
| 84 | * holding port numbers when there is no RPC traffic. | ||
| 85 | */ | ||
| 86 | #define XS_IDLE_DISC_TO (5U * 60 * HZ) | ||
| 87 | |||
| 88 | #ifdef RPC_DEBUG | ||
| 89 | # undef RPC_DEBUG_DATA | ||
| 90 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
| 91 | #endif | ||
| 92 | |||
| 93 | #ifdef RPC_DEBUG_DATA | ||
| 94 | static void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
| 95 | { | ||
| 96 | u8 *buf = (u8 *) packet; | ||
| 97 | int j; | ||
| 98 | |||
| 99 | dprintk("RPC: %s\n", msg); | ||
| 100 | for (j = 0; j < count && j < 128; j += 4) { | ||
| 101 | if (!(j & 31)) { | ||
| 102 | if (j) | ||
| 103 | dprintk("\n"); | ||
| 104 | dprintk("0x%04x ", j); | ||
| 105 | } | ||
| 106 | dprintk("%02x%02x%02x%02x ", | ||
| 107 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
| 108 | } | ||
| 109 | dprintk("\n"); | ||
| 110 | } | ||
| 111 | #else | ||
| 112 | static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
| 113 | { | ||
| 114 | /* NOP */ | ||
| 115 | } | ||
| 116 | #endif | ||
| 117 | |||
| 118 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) | ||
| 119 | |||
| 120 | static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
| 121 | { | ||
| 122 | struct kvec iov = { | ||
| 123 | .iov_base = xdr->head[0].iov_base + base, | ||
| 124 | .iov_len = len - base, | ||
| 125 | }; | ||
| 126 | struct msghdr msg = { | ||
| 127 | .msg_name = addr, | ||
| 128 | .msg_namelen = addrlen, | ||
| 129 | .msg_flags = XS_SENDMSG_FLAGS, | ||
| 130 | }; | ||
| 131 | |||
| 132 | if (xdr->len > len) | ||
| 133 | msg.msg_flags |= MSG_MORE; | ||
| 134 | |||
| 135 | if (likely(iov.iov_len)) | ||
| 136 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
| 137 | return kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
| 138 | } | ||
| 139 | |||
| 140 | static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
| 141 | { | ||
| 142 | struct kvec iov = { | ||
| 143 | .iov_base = xdr->tail[0].iov_base + base, | ||
| 144 | .iov_len = len - base, | ||
| 145 | }; | ||
| 146 | struct msghdr msg = { | ||
| 147 | .msg_flags = XS_SENDMSG_FLAGS, | ||
| 148 | }; | ||
| 149 | |||
| 150 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
| 151 | } | ||
| 152 | |||
| 153 | /** | ||
| 154 | * xs_sendpages - write pages directly to a socket | ||
| 155 | * @sock: socket to send on | ||
| 156 | * @addr: UDP only -- address of destination | ||
| 157 | * @addrlen: UDP only -- length of destination address | ||
| 158 | * @xdr: buffer containing this request | ||
| 159 | * @base: starting position in the buffer | ||
| 160 | * | ||
| 161 | */ | ||
| 162 | static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) | ||
| 163 | { | ||
| 164 | struct page **ppage = xdr->pages; | ||
| 165 | unsigned int len, pglen = xdr->page_len; | ||
| 166 | int err, ret = 0; | ||
| 167 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
| 168 | |||
| 169 | if (unlikely(!sock)) | ||
| 170 | return -ENOTCONN; | ||
| 171 | |||
| 172 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
| 173 | |||
| 174 | len = xdr->head[0].iov_len; | ||
| 175 | if (base < len || (addr != NULL && base == 0)) { | ||
| 176 | err = xs_send_head(sock, addr, addrlen, xdr, base, len); | ||
| 177 | if (ret == 0) | ||
| 178 | ret = err; | ||
| 179 | else if (err > 0) | ||
| 180 | ret += err; | ||
| 181 | if (err != (len - base)) | ||
| 182 | goto out; | ||
| 183 | base = 0; | ||
| 184 | } else | ||
| 185 | base -= len; | ||
| 186 | |||
| 187 | if (unlikely(pglen == 0)) | ||
| 188 | goto copy_tail; | ||
| 189 | if (unlikely(base >= pglen)) { | ||
| 190 | base -= pglen; | ||
| 191 | goto copy_tail; | ||
| 192 | } | ||
| 193 | if (base || xdr->page_base) { | ||
| 194 | pglen -= base; | ||
| 195 | base += xdr->page_base; | ||
| 196 | ppage += base >> PAGE_CACHE_SHIFT; | ||
| 197 | base &= ~PAGE_CACHE_MASK; | ||
| 198 | } | ||
| 199 | |||
| 200 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
| 201 | do { | ||
| 202 | int flags = XS_SENDMSG_FLAGS; | ||
| 203 | |||
| 204 | len = PAGE_CACHE_SIZE; | ||
| 205 | if (base) | ||
| 206 | len -= base; | ||
| 207 | if (pglen < len) | ||
| 208 | len = pglen; | ||
| 209 | |||
| 210 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
| 211 | flags |= MSG_MORE; | ||
| 212 | |||
| 213 | /* Hmm... We might be dealing with highmem pages */ | ||
| 214 | if (PageHighMem(*ppage)) | ||
| 215 | sendpage = sock_no_sendpage; | ||
| 216 | err = sendpage(sock, *ppage, base, len, flags); | ||
| 217 | if (ret == 0) | ||
| 218 | ret = err; | ||
| 219 | else if (err > 0) | ||
| 220 | ret += err; | ||
| 221 | if (err != len) | ||
| 222 | goto out; | ||
| 223 | base = 0; | ||
| 224 | ppage++; | ||
| 225 | } while ((pglen -= len) != 0); | ||
| 226 | copy_tail: | ||
| 227 | len = xdr->tail[0].iov_len; | ||
| 228 | if (base < len) { | ||
| 229 | err = xs_send_tail(sock, xdr, base, len); | ||
| 230 | if (ret == 0) | ||
| 231 | ret = err; | ||
| 232 | else if (err > 0) | ||
| 233 | ret += err; | ||
| 234 | } | ||
| 235 | out: | ||
| 236 | return ret; | ||
| 237 | } | ||
| 238 | |||
| 239 | /** | ||
| 240 | * xs_nospace - place task on wait queue if transmit was incomplete | ||
| 241 | * @task: task to put to sleep | ||
| 242 | * | ||
| 243 | */ | ||
| 244 | static void xs_nospace(struct rpc_task *task) | ||
| 245 | { | ||
| 246 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 247 | struct rpc_xprt *xprt = req->rq_xprt; | ||
| 248 | |||
| 249 | dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", | ||
| 250 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
| 251 | req->rq_slen); | ||
| 252 | |||
| 253 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
| 254 | /* Protect against races with write_space */ | ||
| 255 | spin_lock_bh(&xprt->transport_lock); | ||
| 256 | |||
| 257 | /* Don't race with disconnect */ | ||
| 258 | if (!xprt_connected(xprt)) | ||
| 259 | task->tk_status = -ENOTCONN; | ||
| 260 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) | ||
| 261 | xprt_wait_for_buffer_space(task); | ||
| 262 | |||
| 263 | spin_unlock_bh(&xprt->transport_lock); | ||
| 264 | } else | ||
| 265 | /* Keep holding the socket if it is blocked */ | ||
| 266 | rpc_delay(task, HZ>>4); | ||
| 267 | } | ||
| 268 | |||
| 269 | /** | ||
| 270 | * xs_udp_send_request - write an RPC request to a UDP socket | ||
| 271 | * @task: address of RPC task that manages the state of an RPC request | ||
| 272 | * | ||
| 273 | * Return values: | ||
| 274 | * 0: The request has been sent | ||
| 275 | * EAGAIN: The socket was blocked, please call again later to | ||
| 276 | * complete the request | ||
| 277 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
| 278 | * other: Some other error occured, the request was not sent | ||
| 279 | */ | ||
| 280 | static int xs_udp_send_request(struct rpc_task *task) | ||
| 281 | { | ||
| 282 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 283 | struct rpc_xprt *xprt = req->rq_xprt; | ||
| 284 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
| 285 | int status; | ||
| 286 | |||
| 287 | xs_pktdump("packet data:", | ||
| 288 | req->rq_svec->iov_base, | ||
| 289 | req->rq_svec->iov_len); | ||
| 290 | |||
| 291 | req->rq_xtime = jiffies; | ||
| 292 | status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, | ||
| 293 | sizeof(xprt->addr), xdr, req->rq_bytes_sent); | ||
| 294 | |||
| 295 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", | ||
| 296 | xdr->len - req->rq_bytes_sent, status); | ||
| 297 | |||
| 298 | if (likely(status >= (int) req->rq_slen)) | ||
| 299 | return 0; | ||
| 300 | |||
| 301 | /* Still some bytes left; set up for a retry later. */ | ||
| 302 | if (status > 0) | ||
| 303 | status = -EAGAIN; | ||
| 304 | |||
| 305 | switch (status) { | ||
| 306 | case -ENETUNREACH: | ||
| 307 | case -EPIPE: | ||
| 308 | case -ECONNREFUSED: | ||
| 309 | /* When the server has died, an ICMP port unreachable message | ||
| 310 | * prompts ECONNREFUSED. */ | ||
| 311 | break; | ||
| 312 | case -EAGAIN: | ||
| 313 | xs_nospace(task); | ||
| 314 | break; | ||
| 315 | default: | ||
| 316 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
| 317 | -status); | ||
| 318 | break; | ||
| 319 | } | ||
| 320 | |||
| 321 | return status; | ||
| 322 | } | ||
| 323 | |||
| 324 | static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) | ||
| 325 | { | ||
| 326 | u32 reclen = buf->len - sizeof(rpc_fraghdr); | ||
| 327 | rpc_fraghdr *base = buf->head[0].iov_base; | ||
| 328 | *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); | ||
| 329 | } | ||
| 330 | |||
| 331 | /** | ||
| 332 | * xs_tcp_send_request - write an RPC request to a TCP socket | ||
| 333 | * @task: address of RPC task that manages the state of an RPC request | ||
| 334 | * | ||
| 335 | * Return values: | ||
| 336 | * 0: The request has been sent | ||
| 337 | * EAGAIN: The socket was blocked, please call again later to | ||
| 338 | * complete the request | ||
| 339 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
| 340 | * other: Some other error occured, the request was not sent | ||
| 341 | * | ||
| 342 | * XXX: In the case of soft timeouts, should we eventually give up | ||
| 343 | * if sendmsg is not able to make progress? | ||
| 344 | */ | ||
| 345 | static int xs_tcp_send_request(struct rpc_task *task) | ||
| 346 | { | ||
| 347 | struct rpc_rqst *req = task->tk_rqstp; | ||
| 348 | struct rpc_xprt *xprt = req->rq_xprt; | ||
| 349 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
| 350 | int status, retry = 0; | ||
| 351 | |||
| 352 | xs_encode_tcp_record_marker(&req->rq_snd_buf); | ||
| 353 | |||
| 354 | xs_pktdump("packet data:", | ||
| 355 | req->rq_svec->iov_base, | ||
| 356 | req->rq_svec->iov_len); | ||
| 357 | |||
| 358 | /* Continue transmitting the packet/record. We must be careful | ||
| 359 | * to cope with writespace callbacks arriving _after_ we have | ||
| 360 | * called sendmsg(). */ | ||
| 361 | while (1) { | ||
| 362 | req->rq_xtime = jiffies; | ||
| 363 | status = xs_sendpages(xprt->sock, NULL, 0, xdr, | ||
| 364 | req->rq_bytes_sent); | ||
| 365 | |||
| 366 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n", | ||
| 367 | xdr->len - req->rq_bytes_sent, status); | ||
| 368 | |||
| 369 | if (unlikely(status < 0)) | ||
| 370 | break; | ||
| 371 | |||
| 372 | /* If we've sent the entire packet, immediately | ||
| 373 | * reset the count of bytes sent. */ | ||
| 374 | req->rq_bytes_sent += status; | ||
| 375 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { | ||
| 376 | req->rq_bytes_sent = 0; | ||
| 377 | return 0; | ||
| 378 | } | ||
| 379 | |||
| 380 | status = -EAGAIN; | ||
| 381 | if (retry++ > XS_SENDMSG_RETRY) | ||
| 382 | break; | ||
| 383 | } | ||
| 384 | |||
| 385 | switch (status) { | ||
| 386 | case -EAGAIN: | ||
| 387 | xs_nospace(task); | ||
| 388 | break; | ||
| 389 | case -ECONNREFUSED: | ||
| 390 | case -ECONNRESET: | ||
| 391 | case -ENOTCONN: | ||
| 392 | case -EPIPE: | ||
| 393 | status = -ENOTCONN; | ||
| 394 | break; | ||
| 395 | default: | ||
| 396 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
| 397 | -status); | ||
| 398 | xprt_disconnect(xprt); | ||
| 399 | break; | ||
| 400 | } | ||
| 401 | |||
| 402 | return status; | ||
| 403 | } | ||
| 404 | |||
| 405 | /** | ||
| 406 | * xs_close - close a socket | ||
| 407 | * @xprt: transport | ||
| 408 | * | ||
| 409 | * This is used when all requests are complete; ie, no DRC state remains | ||
| 410 | * on the server we want to save. | ||
| 411 | */ | ||
| 412 | static void xs_close(struct rpc_xprt *xprt) | ||
| 413 | { | ||
| 414 | struct socket *sock = xprt->sock; | ||
| 415 | struct sock *sk = xprt->inet; | ||
| 416 | |||
| 417 | if (!sk) | ||
| 418 | return; | ||
| 419 | |||
| 420 | dprintk("RPC: xs_close xprt %p\n", xprt); | ||
| 421 | |||
| 422 | write_lock_bh(&sk->sk_callback_lock); | ||
| 423 | xprt->inet = NULL; | ||
| 424 | xprt->sock = NULL; | ||
| 425 | |||
| 426 | sk->sk_user_data = NULL; | ||
| 427 | sk->sk_data_ready = xprt->old_data_ready; | ||
| 428 | sk->sk_state_change = xprt->old_state_change; | ||
| 429 | sk->sk_write_space = xprt->old_write_space; | ||
| 430 | write_unlock_bh(&sk->sk_callback_lock); | ||
| 431 | |||
| 432 | sk->sk_no_check = 0; | ||
| 433 | |||
| 434 | sock_release(sock); | ||
| 435 | } | ||
| 436 | |||
| 437 | /** | ||
| 438 | * xs_destroy - prepare to shutdown a transport | ||
| 439 | * @xprt: doomed transport | ||
| 440 | * | ||
| 441 | */ | ||
| 442 | static void xs_destroy(struct rpc_xprt *xprt) | ||
| 443 | { | ||
| 444 | dprintk("RPC: xs_destroy xprt %p\n", xprt); | ||
| 445 | |||
| 446 | cancel_delayed_work(&xprt->connect_worker); | ||
| 447 | flush_scheduled_work(); | ||
| 448 | |||
| 449 | xprt_disconnect(xprt); | ||
| 450 | xs_close(xprt); | ||
| 451 | kfree(xprt->slot); | ||
| 452 | } | ||
| 453 | |||
| 454 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) | ||
| 455 | { | ||
| 456 | return (struct rpc_xprt *) sk->sk_user_data; | ||
| 457 | } | ||
| 458 | |||
| 459 | /** | ||
| 460 | * xs_udp_data_ready - "data ready" callback for UDP sockets | ||
| 461 | * @sk: socket with data to read | ||
| 462 | * @len: how much data to read | ||
| 463 | * | ||
| 464 | */ | ||
| 465 | static void xs_udp_data_ready(struct sock *sk, int len) | ||
| 466 | { | ||
| 467 | struct rpc_task *task; | ||
| 468 | struct rpc_xprt *xprt; | ||
| 469 | struct rpc_rqst *rovr; | ||
| 470 | struct sk_buff *skb; | ||
| 471 | int err, repsize, copied; | ||
| 472 | u32 _xid, *xp; | ||
| 473 | |||
| 474 | read_lock(&sk->sk_callback_lock); | ||
| 475 | dprintk("RPC: xs_udp_data_ready...\n"); | ||
| 476 | if (!(xprt = xprt_from_sock(sk))) | ||
| 477 | goto out; | ||
| 478 | |||
| 479 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
| 480 | goto out; | ||
| 481 | |||
| 482 | if (xprt->shutdown) | ||
| 483 | goto dropit; | ||
| 484 | |||
| 485 | repsize = skb->len - sizeof(struct udphdr); | ||
| 486 | if (repsize < 4) { | ||
| 487 | dprintk("RPC: impossible RPC reply size %d!\n", repsize); | ||
| 488 | goto dropit; | ||
| 489 | } | ||
| 490 | |||
| 491 | /* Copy the XID from the skb... */ | ||
| 492 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
| 493 | sizeof(_xid), &_xid); | ||
| 494 | if (xp == NULL) | ||
| 495 | goto dropit; | ||
| 496 | |||
| 497 | /* Look up and lock the request corresponding to the given XID */ | ||
| 498 | spin_lock(&xprt->transport_lock); | ||
| 499 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
| 500 | if (!rovr) | ||
| 501 | goto out_unlock; | ||
| 502 | task = rovr->rq_task; | ||
| 503 | |||
| 504 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
| 505 | copied = repsize; | ||
| 506 | |||
| 507 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
| 508 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
| 509 | goto out_unlock; | ||
| 510 | |||
| 511 | /* Something worked... */ | ||
| 512 | dst_confirm(skb->dst); | ||
| 513 | |||
| 514 | xprt_adjust_cwnd(task, copied); | ||
| 515 | xprt_update_rtt(task); | ||
| 516 | xprt_complete_rqst(task, copied); | ||
| 517 | |||
| 518 | out_unlock: | ||
| 519 | spin_unlock(&xprt->transport_lock); | ||
| 520 | dropit: | ||
| 521 | skb_free_datagram(sk, skb); | ||
| 522 | out: | ||
| 523 | read_unlock(&sk->sk_callback_lock); | ||
| 524 | } | ||
| 525 | |||
| 526 | static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
| 527 | { | ||
| 528 | if (len > desc->count) | ||
| 529 | len = desc->count; | ||
| 530 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) { | ||
| 531 | dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", | ||
| 532 | len, desc->count); | ||
| 533 | return 0; | ||
| 534 | } | ||
| 535 | desc->offset += len; | ||
| 536 | desc->count -= len; | ||
| 537 | dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", | ||
| 538 | len, desc->count); | ||
| 539 | return len; | ||
| 540 | } | ||
| 541 | |||
| 542 | static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 543 | { | ||
| 544 | size_t len, used; | ||
| 545 | char *p; | ||
| 546 | |||
| 547 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
| 548 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
| 549 | used = xs_tcp_copy_data(desc, p, len); | ||
| 550 | xprt->tcp_offset += used; | ||
| 551 | if (used != len) | ||
| 552 | return; | ||
| 553 | |||
| 554 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
| 555 | if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) | ||
| 556 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
| 557 | else | ||
| 558 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
| 559 | xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; | ||
| 560 | |||
| 561 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
| 562 | xprt->tcp_offset = 0; | ||
| 563 | |||
| 564 | /* Sanity check of the record length */ | ||
| 565 | if (unlikely(xprt->tcp_reclen < 4)) { | ||
| 566 | dprintk("RPC: invalid TCP record fragment length\n"); | ||
| 567 | xprt_disconnect(xprt); | ||
| 568 | return; | ||
| 569 | } | ||
| 570 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
| 571 | xprt->tcp_reclen); | ||
| 572 | } | ||
| 573 | |||
| 574 | static void xs_tcp_check_recm(struct rpc_xprt *xprt) | ||
| 575 | { | ||
| 576 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", | ||
| 577 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); | ||
| 578 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
| 579 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
| 580 | xprt->tcp_offset = 0; | ||
| 581 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
| 582 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 583 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
| 584 | xprt->tcp_copied = 0; | ||
| 585 | } | ||
| 586 | } | ||
| 587 | } | ||
| 588 | |||
| 589 | static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 590 | { | ||
| 591 | size_t len, used; | ||
| 592 | char *p; | ||
| 593 | |||
| 594 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
| 595 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
| 596 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
| 597 | used = xs_tcp_copy_data(desc, p, len); | ||
| 598 | xprt->tcp_offset += used; | ||
| 599 | if (used != len) | ||
| 600 | return; | ||
| 601 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
| 602 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
| 603 | xprt->tcp_copied = 4; | ||
| 604 | dprintk("RPC: reading reply for XID %08x\n", | ||
| 605 | ntohl(xprt->tcp_xid)); | ||
| 606 | xs_tcp_check_recm(xprt); | ||
| 607 | } | ||
| 608 | |||
| 609 | static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 610 | { | ||
| 611 | struct rpc_rqst *req; | ||
| 612 | struct xdr_buf *rcvbuf; | ||
| 613 | size_t len; | ||
| 614 | ssize_t r; | ||
| 615 | |||
| 616 | /* Find and lock the request corresponding to this xid */ | ||
| 617 | spin_lock(&xprt->transport_lock); | ||
| 618 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
| 619 | if (!req) { | ||
| 620 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 621 | dprintk("RPC: XID %08x request not found!\n", | ||
| 622 | ntohl(xprt->tcp_xid)); | ||
| 623 | spin_unlock(&xprt->transport_lock); | ||
| 624 | return; | ||
| 625 | } | ||
| 626 | |||
| 627 | rcvbuf = &req->rq_private_buf; | ||
| 628 | len = desc->count; | ||
| 629 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
| 630 | skb_reader_t my_desc; | ||
| 631 | |||
| 632 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
| 633 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
| 634 | my_desc.count = len; | ||
| 635 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
| 636 | &my_desc, xs_tcp_copy_data); | ||
| 637 | desc->count -= r; | ||
| 638 | desc->offset += r; | ||
| 639 | } else | ||
| 640 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
| 641 | desc, xs_tcp_copy_data); | ||
| 642 | |||
| 643 | if (r > 0) { | ||
| 644 | xprt->tcp_copied += r; | ||
| 645 | xprt->tcp_offset += r; | ||
| 646 | } | ||
| 647 | if (r != len) { | ||
| 648 | /* Error when copying to the receive buffer, | ||
| 649 | * usually because we weren't able to allocate | ||
| 650 | * additional buffer pages. All we can do now | ||
| 651 | * is turn off XPRT_COPY_DATA, so the request | ||
| 652 | * will not receive any additional updates, | ||
| 653 | * and time out. | ||
| 654 | * Any remaining data from this record will | ||
| 655 | * be discarded. | ||
| 656 | */ | ||
| 657 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 658 | dprintk("RPC: XID %08x truncated request\n", | ||
| 659 | ntohl(xprt->tcp_xid)); | ||
| 660 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
| 661 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
| 662 | goto out; | ||
| 663 | } | ||
| 664 | |||
| 665 | dprintk("RPC: XID %08x read %Zd bytes\n", | ||
| 666 | ntohl(xprt->tcp_xid), r); | ||
| 667 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
| 668 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
| 669 | |||
| 670 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
| 671 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 672 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
| 673 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
| 674 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
| 675 | } | ||
| 676 | |||
| 677 | out: | ||
| 678 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) | ||
| 679 | xprt_complete_rqst(req->rq_task, xprt->tcp_copied); | ||
| 680 | spin_unlock(&xprt->transport_lock); | ||
| 681 | xs_tcp_check_recm(xprt); | ||
| 682 | } | ||
| 683 | |||
| 684 | static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
| 685 | { | ||
| 686 | size_t len; | ||
| 687 | |||
| 688 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
| 689 | if (len > desc->count) | ||
| 690 | len = desc->count; | ||
| 691 | desc->count -= len; | ||
| 692 | desc->offset += len; | ||
| 693 | xprt->tcp_offset += len; | ||
| 694 | dprintk("RPC: discarded %Zu bytes\n", len); | ||
| 695 | xs_tcp_check_recm(xprt); | ||
| 696 | } | ||
| 697 | |||
| 698 | static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) | ||
| 699 | { | ||
| 700 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
| 701 | skb_reader_t desc = { | ||
| 702 | .skb = skb, | ||
| 703 | .offset = offset, | ||
| 704 | .count = len, | ||
| 705 | .csum = 0 | ||
| 706 | }; | ||
| 707 | |||
| 708 | dprintk("RPC: xs_tcp_data_recv started\n"); | ||
| 709 | do { | ||
| 710 | /* Read in a new fragment marker if necessary */ | ||
| 711 | /* Can we ever really expect to get completely empty fragments? */ | ||
| 712 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
| 713 | xs_tcp_read_fraghdr(xprt, &desc); | ||
| 714 | continue; | ||
| 715 | } | ||
| 716 | /* Read in the xid if necessary */ | ||
| 717 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
| 718 | xs_tcp_read_xid(xprt, &desc); | ||
| 719 | continue; | ||
| 720 | } | ||
| 721 | /* Read in the request data */ | ||
| 722 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
| 723 | xs_tcp_read_request(xprt, &desc); | ||
| 724 | continue; | ||
| 725 | } | ||
| 726 | /* Skip over any trailing bytes on short reads */ | ||
| 727 | xs_tcp_read_discard(xprt, &desc); | ||
| 728 | } while (desc.count); | ||
| 729 | dprintk("RPC: xs_tcp_data_recv done\n"); | ||
| 730 | return len - desc.count; | ||
| 731 | } | ||
| 732 | |||
| 733 | /** | ||
| 734 | * xs_tcp_data_ready - "data ready" callback for TCP sockets | ||
| 735 | * @sk: socket with data to read | ||
| 736 | * @bytes: how much data to read | ||
| 737 | * | ||
| 738 | */ | ||
| 739 | static void xs_tcp_data_ready(struct sock *sk, int bytes) | ||
| 740 | { | ||
| 741 | struct rpc_xprt *xprt; | ||
| 742 | read_descriptor_t rd_desc; | ||
| 743 | |||
| 744 | read_lock(&sk->sk_callback_lock); | ||
| 745 | dprintk("RPC: xs_tcp_data_ready...\n"); | ||
| 746 | if (!(xprt = xprt_from_sock(sk))) | ||
| 747 | goto out; | ||
| 748 | if (xprt->shutdown) | ||
| 749 | goto out; | ||
| 750 | |||
| 751 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ | ||
| 752 | rd_desc.arg.data = xprt; | ||
| 753 | rd_desc.count = 65536; | ||
| 754 | tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); | ||
| 755 | out: | ||
| 756 | read_unlock(&sk->sk_callback_lock); | ||
| 757 | } | ||
| 758 | |||
| 759 | /** | ||
| 760 | * xs_tcp_state_change - callback to handle TCP socket state changes | ||
| 761 | * @sk: socket whose state has changed | ||
| 762 | * | ||
| 763 | */ | ||
| 764 | static void xs_tcp_state_change(struct sock *sk) | ||
| 765 | { | ||
| 766 | struct rpc_xprt *xprt; | ||
| 767 | |||
| 768 | read_lock(&sk->sk_callback_lock); | ||
| 769 | if (!(xprt = xprt_from_sock(sk))) | ||
| 770 | goto out; | ||
| 771 | dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); | ||
| 772 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | ||
| 773 | sk->sk_state, xprt_connected(xprt), | ||
| 774 | sock_flag(sk, SOCK_DEAD), | ||
| 775 | sock_flag(sk, SOCK_ZAPPED)); | ||
| 776 | |||
| 777 | switch (sk->sk_state) { | ||
| 778 | case TCP_ESTABLISHED: | ||
| 779 | spin_lock_bh(&xprt->transport_lock); | ||
| 780 | if (!xprt_test_and_set_connected(xprt)) { | ||
| 781 | /* Reset TCP record info */ | ||
| 782 | xprt->tcp_offset = 0; | ||
| 783 | xprt->tcp_reclen = 0; | ||
| 784 | xprt->tcp_copied = 0; | ||
| 785 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
| 786 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
| 787 | xprt_wake_pending_tasks(xprt, 0); | ||
| 788 | } | ||
| 789 | spin_unlock_bh(&xprt->transport_lock); | ||
| 790 | break; | ||
| 791 | case TCP_SYN_SENT: | ||
| 792 | case TCP_SYN_RECV: | ||
| 793 | break; | ||
| 794 | default: | ||
| 795 | xprt_disconnect(xprt); | ||
| 796 | break; | ||
| 797 | } | ||
| 798 | out: | ||
| 799 | read_unlock(&sk->sk_callback_lock); | ||
| 800 | } | ||
| 801 | |||
| 802 | /** | ||
| 803 | * xs_udp_write_space - callback invoked when socket buffer space | ||
| 804 | * becomes available | ||
| 805 | * @sk: socket whose state has changed | ||
| 806 | * | ||
| 807 | * Called when more output buffer space is available for this socket. | ||
| 808 | * We try not to wake our writers until they can make "significant" | ||
| 809 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
| 810 | * with a bunch of small requests. | ||
| 811 | */ | ||
| 812 | static void xs_udp_write_space(struct sock *sk) | ||
| 813 | { | ||
| 814 | read_lock(&sk->sk_callback_lock); | ||
| 815 | |||
| 816 | /* from net/core/sock.c:sock_def_write_space */ | ||
| 817 | if (sock_writeable(sk)) { | ||
| 818 | struct socket *sock; | ||
| 819 | struct rpc_xprt *xprt; | ||
| 820 | |||
| 821 | if (unlikely(!(sock = sk->sk_socket))) | ||
| 822 | goto out; | ||
| 823 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
| 824 | goto out; | ||
| 825 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
| 826 | goto out; | ||
| 827 | |||
| 828 | xprt_write_space(xprt); | ||
| 829 | } | ||
| 830 | |||
| 831 | out: | ||
| 832 | read_unlock(&sk->sk_callback_lock); | ||
| 833 | } | ||
| 834 | |||
| 835 | /** | ||
| 836 | * xs_tcp_write_space - callback invoked when socket buffer space | ||
| 837 | * becomes available | ||
| 838 | * @sk: socket whose state has changed | ||
| 839 | * | ||
| 840 | * Called when more output buffer space is available for this socket. | ||
| 841 | * We try not to wake our writers until they can make "significant" | ||
| 842 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
| 843 | * with a bunch of small requests. | ||
| 844 | */ | ||
| 845 | static void xs_tcp_write_space(struct sock *sk) | ||
| 846 | { | ||
| 847 | read_lock(&sk->sk_callback_lock); | ||
| 848 | |||
| 849 | /* from net/core/stream.c:sk_stream_write_space */ | ||
| 850 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | ||
| 851 | struct socket *sock; | ||
| 852 | struct rpc_xprt *xprt; | ||
| 853 | |||
| 854 | if (unlikely(!(sock = sk->sk_socket))) | ||
| 855 | goto out; | ||
| 856 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
| 857 | goto out; | ||
| 858 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
| 859 | goto out; | ||
| 860 | |||
| 861 | xprt_write_space(xprt); | ||
| 862 | } | ||
| 863 | |||
| 864 | out: | ||
| 865 | read_unlock(&sk->sk_callback_lock); | ||
| 866 | } | ||
| 867 | |||
| 868 | static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) | ||
| 869 | { | ||
| 870 | struct sock *sk = xprt->inet; | ||
| 871 | |||
| 872 | if (xprt->rcvsize) { | ||
| 873 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
| 874 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
| 875 | } | ||
| 876 | if (xprt->sndsize) { | ||
| 877 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
| 878 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
| 879 | sk->sk_write_space(sk); | ||
| 880 | } | ||
| 881 | } | ||
| 882 | |||
| 883 | /** | ||
| 884 | * xs_udp_set_buffer_size - set send and receive limits | ||
| 885 | * @xprt: generic transport | ||
| 886 | * @sndsize: requested size of send buffer, in bytes | ||
| 887 | * @rcvsize: requested size of receive buffer, in bytes | ||
| 888 | * | ||
| 889 | * Set socket send and receive buffer size limits. | ||
| 890 | */ | ||
| 891 | static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) | ||
| 892 | { | ||
| 893 | xprt->sndsize = 0; | ||
| 894 | if (sndsize) | ||
| 895 | xprt->sndsize = sndsize + 1024; | ||
| 896 | xprt->rcvsize = 0; | ||
| 897 | if (rcvsize) | ||
| 898 | xprt->rcvsize = rcvsize + 1024; | ||
| 899 | |||
| 900 | xs_udp_do_set_buffer_size(xprt); | ||
| 901 | } | ||
| 902 | |||
| 903 | /** | ||
| 904 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport | ||
| 905 | * @task: task that timed out | ||
| 906 | * | ||
| 907 | * Adjust the congestion window after a retransmit timeout has occurred. | ||
| 908 | */ | ||
| 909 | static void xs_udp_timer(struct rpc_task *task) | ||
| 910 | { | ||
| 911 | xprt_adjust_cwnd(task, -ETIMEDOUT); | ||
| 912 | } | ||
| 913 | |||
| 914 | static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | ||
| 915 | { | ||
| 916 | struct sockaddr_in myaddr = { | ||
| 917 | .sin_family = AF_INET, | ||
| 918 | }; | ||
| 919 | int err; | ||
| 920 | unsigned short port = xprt->port; | ||
| 921 | |||
| 922 | do { | ||
| 923 | myaddr.sin_port = htons(port); | ||
| 924 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
| 925 | sizeof(myaddr)); | ||
| 926 | if (err == 0) { | ||
| 927 | xprt->port = port; | ||
| 928 | dprintk("RPC: xs_bindresvport bound to port %u\n", | ||
| 929 | port); | ||
| 930 | return 0; | ||
| 931 | } | ||
| 932 | if (port <= xprt_min_resvport) | ||
| 933 | port = xprt_max_resvport; | ||
| 934 | else | ||
| 935 | port--; | ||
| 936 | } while (err == -EADDRINUSE && port != xprt->port); | ||
| 937 | |||
| 938 | dprintk("RPC: can't bind to reserved port (%d).\n", -err); | ||
| 939 | return err; | ||
| 940 | } | ||
| 941 | |||
| 942 | /** | ||
| 943 | * xs_udp_connect_worker - set up a UDP socket | ||
| 944 | * @args: RPC transport to connect | ||
| 945 | * | ||
| 946 | * Invoked by a work queue tasklet. | ||
| 947 | */ | ||
| 948 | static void xs_udp_connect_worker(void *args) | ||
| 949 | { | ||
| 950 | struct rpc_xprt *xprt = (struct rpc_xprt *) args; | ||
| 951 | struct socket *sock = xprt->sock; | ||
| 952 | int err, status = -EIO; | ||
| 953 | |||
| 954 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
| 955 | goto out; | ||
| 956 | |||
| 957 | dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); | ||
| 958 | |||
| 959 | /* Start by resetting any existing state */ | ||
| 960 | xs_close(xprt); | ||
| 961 | |||
| 962 | if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { | ||
| 963 | dprintk("RPC: can't create UDP transport socket (%d).\n", -err); | ||
| 964 | goto out; | ||
| 965 | } | ||
| 966 | |||
| 967 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
| 968 | sock_release(sock); | ||
| 969 | goto out; | ||
| 970 | } | ||
| 971 | |||
| 972 | if (!xprt->inet) { | ||
| 973 | struct sock *sk = sock->sk; | ||
| 974 | |||
| 975 | write_lock_bh(&sk->sk_callback_lock); | ||
| 976 | |||
| 977 | sk->sk_user_data = xprt; | ||
| 978 | xprt->old_data_ready = sk->sk_data_ready; | ||
| 979 | xprt->old_state_change = sk->sk_state_change; | ||
| 980 | xprt->old_write_space = sk->sk_write_space; | ||
| 981 | sk->sk_data_ready = xs_udp_data_ready; | ||
| 982 | sk->sk_write_space = xs_udp_write_space; | ||
| 983 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
| 984 | |||
| 985 | xprt_set_connected(xprt); | ||
| 986 | |||
| 987 | /* Reset to new socket */ | ||
| 988 | xprt->sock = sock; | ||
| 989 | xprt->inet = sk; | ||
| 990 | |||
| 991 | write_unlock_bh(&sk->sk_callback_lock); | ||
| 992 | } | ||
| 993 | xs_udp_do_set_buffer_size(xprt); | ||
| 994 | status = 0; | ||
| 995 | out: | ||
| 996 | xprt_wake_pending_tasks(xprt, status); | ||
| 997 | xprt_clear_connecting(xprt); | ||
| 998 | } | ||
| 999 | |||
| 1000 | /* | ||
| 1001 | * We need to preserve the port number so the reply cache on the server can | ||
| 1002 | * find our cached RPC replies when we get around to reconnecting. | ||
| 1003 | */ | ||
| 1004 | static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) | ||
| 1005 | { | ||
| 1006 | int result; | ||
| 1007 | struct socket *sock = xprt->sock; | ||
| 1008 | struct sockaddr any; | ||
| 1009 | |||
| 1010 | dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); | ||
| 1011 | |||
| 1012 | /* | ||
| 1013 | * Disconnect the transport socket by doing a connect operation | ||
| 1014 | * with AF_UNSPEC. This should return immediately... | ||
| 1015 | */ | ||
| 1016 | memset(&any, 0, sizeof(any)); | ||
| 1017 | any.sa_family = AF_UNSPEC; | ||
| 1018 | result = sock->ops->connect(sock, &any, sizeof(any), 0); | ||
| 1019 | if (result) | ||
| 1020 | dprintk("RPC: AF_UNSPEC connect return code %d\n", | ||
| 1021 | result); | ||
| 1022 | } | ||
| 1023 | |||
| 1024 | /** | ||
| 1025 | * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint | ||
| 1026 | * @args: RPC transport to connect | ||
| 1027 | * | ||
| 1028 | * Invoked by a work queue tasklet. | ||
| 1029 | */ | ||
| 1030 | static void xs_tcp_connect_worker(void *args) | ||
| 1031 | { | ||
| 1032 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
| 1033 | struct socket *sock = xprt->sock; | ||
| 1034 | int err, status = -EIO; | ||
| 1035 | |||
| 1036 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
| 1037 | goto out; | ||
| 1038 | |||
| 1039 | dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); | ||
| 1040 | |||
| 1041 | if (!xprt->sock) { | ||
| 1042 | /* start from scratch */ | ||
| 1043 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | ||
| 1044 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | ||
| 1045 | goto out; | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
| 1049 | sock_release(sock); | ||
| 1050 | goto out; | ||
| 1051 | } | ||
| 1052 | } else | ||
| 1053 | /* "close" the socket, preserving the local port */ | ||
| 1054 | xs_tcp_reuse_connection(xprt); | ||
| 1055 | |||
| 1056 | if (!xprt->inet) { | ||
| 1057 | struct sock *sk = sock->sk; | ||
| 1058 | |||
| 1059 | write_lock_bh(&sk->sk_callback_lock); | ||
| 1060 | |||
| 1061 | sk->sk_user_data = xprt; | ||
| 1062 | xprt->old_data_ready = sk->sk_data_ready; | ||
| 1063 | xprt->old_state_change = sk->sk_state_change; | ||
| 1064 | xprt->old_write_space = sk->sk_write_space; | ||
| 1065 | sk->sk_data_ready = xs_tcp_data_ready; | ||
| 1066 | sk->sk_state_change = xs_tcp_state_change; | ||
| 1067 | sk->sk_write_space = xs_tcp_write_space; | ||
| 1068 | |||
| 1069 | /* socket options */ | ||
| 1070 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
| 1071 | sock_reset_flag(sk, SOCK_LINGER); | ||
| 1072 | tcp_sk(sk)->linger2 = 0; | ||
| 1073 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; | ||
| 1074 | |||
| 1075 | xprt_clear_connected(xprt); | ||
| 1076 | |||
| 1077 | /* Reset to new socket */ | ||
| 1078 | xprt->sock = sock; | ||
| 1079 | xprt->inet = sk; | ||
| 1080 | |||
| 1081 | write_unlock_bh(&sk->sk_callback_lock); | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | /* Tell the socket layer to start connecting... */ | ||
| 1085 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
| 1086 | sizeof(xprt->addr), O_NONBLOCK); | ||
| 1087 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
| 1088 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
| 1089 | if (status < 0) { | ||
| 1090 | switch (status) { | ||
| 1091 | case -EINPROGRESS: | ||
| 1092 | case -EALREADY: | ||
| 1093 | goto out_clear; | ||
| 1094 | case -ECONNREFUSED: | ||
| 1095 | case -ECONNRESET: | ||
| 1096 | /* retry with existing socket, after a delay */ | ||
| 1097 | break; | ||
| 1098 | default: | ||
| 1099 | /* get rid of existing socket, and retry */ | ||
| 1100 | xs_close(xprt); | ||
| 1101 | break; | ||
| 1102 | } | ||
| 1103 | } | ||
| 1104 | out: | ||
| 1105 | xprt_wake_pending_tasks(xprt, status); | ||
| 1106 | out_clear: | ||
| 1107 | xprt_clear_connecting(xprt); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | /** | ||
| 1111 | * xs_connect - connect a socket to a remote endpoint | ||
| 1112 | * @task: address of RPC task that manages state of connect request | ||
| 1113 | * | ||
| 1114 | * TCP: If the remote end dropped the connection, delay reconnecting. | ||
| 1115 | * | ||
| 1116 | * UDP socket connects are synchronous, but we use a work queue anyway | ||
| 1117 | * to guarantee that even unprivileged user processes can set up a | ||
| 1118 | * socket on a privileged port. | ||
| 1119 | * | ||
| 1120 | * If a UDP socket connect fails, the delay behavior here prevents | ||
| 1121 | * retry floods (hard mounts). | ||
| 1122 | */ | ||
| 1123 | static void xs_connect(struct rpc_task *task) | ||
| 1124 | { | ||
| 1125 | struct rpc_xprt *xprt = task->tk_xprt; | ||
| 1126 | |||
| 1127 | if (xprt_test_and_set_connecting(xprt)) | ||
| 1128 | return; | ||
| 1129 | |||
| 1130 | if (xprt->sock != NULL) { | ||
| 1131 | dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", | ||
| 1132 | xprt, xprt->reestablish_timeout / HZ); | ||
| 1133 | schedule_delayed_work(&xprt->connect_worker, | ||
| 1134 | xprt->reestablish_timeout); | ||
| 1135 | xprt->reestablish_timeout <<= 1; | ||
| 1136 | if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) | ||
| 1137 | xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; | ||
| 1138 | } else { | ||
| 1139 | dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); | ||
| 1140 | schedule_work(&xprt->connect_worker); | ||
| 1141 | |||
| 1142 | /* flush_scheduled_work can sleep... */ | ||
| 1143 | if (!RPC_IS_ASYNC(task)) | ||
| 1144 | flush_scheduled_work(); | ||
| 1145 | } | ||
| 1146 | } | ||
| 1147 | |||
| 1148 | static struct rpc_xprt_ops xs_udp_ops = { | ||
| 1149 | .set_buffer_size = xs_udp_set_buffer_size, | ||
| 1150 | .reserve_xprt = xprt_reserve_xprt_cong, | ||
| 1151 | .release_xprt = xprt_release_xprt_cong, | ||
| 1152 | .connect = xs_connect, | ||
| 1153 | .send_request = xs_udp_send_request, | ||
| 1154 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, | ||
| 1155 | .timer = xs_udp_timer, | ||
| 1156 | .release_request = xprt_release_rqst_cong, | ||
| 1157 | .close = xs_close, | ||
| 1158 | .destroy = xs_destroy, | ||
| 1159 | }; | ||
| 1160 | |||
| 1161 | static struct rpc_xprt_ops xs_tcp_ops = { | ||
| 1162 | .reserve_xprt = xprt_reserve_xprt, | ||
| 1163 | .release_xprt = xprt_release_xprt, | ||
| 1164 | .connect = xs_connect, | ||
| 1165 | .send_request = xs_tcp_send_request, | ||
| 1166 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | ||
| 1167 | .close = xs_close, | ||
| 1168 | .destroy = xs_destroy, | ||
| 1169 | }; | ||
| 1170 | |||
| 1171 | /** | ||
| 1172 | * xs_setup_udp - Set up transport to use a UDP socket | ||
| 1173 | * @xprt: transport to set up | ||
| 1174 | * @to: timeout parameters | ||
| 1175 | * | ||
| 1176 | */ | ||
| 1177 | int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
| 1178 | { | ||
| 1179 | size_t slot_table_size; | ||
| 1180 | |||
| 1181 | dprintk("RPC: setting up udp-ipv4 transport...\n"); | ||
| 1182 | |||
| 1183 | xprt->max_reqs = xprt_udp_slot_table_entries; | ||
| 1184 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
| 1185 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
| 1186 | if (xprt->slot == NULL) | ||
| 1187 | return -ENOMEM; | ||
| 1188 | memset(xprt->slot, 0, slot_table_size); | ||
| 1189 | |||
| 1190 | xprt->prot = IPPROTO_UDP; | ||
| 1191 | xprt->port = xprt_max_resvport; | ||
| 1192 | xprt->tsh_size = 0; | ||
| 1193 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
| 1194 | /* XXX: header size can vary due to auth type, IPv6, etc. */ | ||
| 1195 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | ||
| 1196 | |||
| 1197 | INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); | ||
| 1198 | xprt->bind_timeout = XS_BIND_TO; | ||
| 1199 | xprt->connect_timeout = XS_UDP_CONN_TO; | ||
| 1200 | xprt->reestablish_timeout = XS_UDP_REEST_TO; | ||
| 1201 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
| 1202 | |||
| 1203 | xprt->ops = &xs_udp_ops; | ||
| 1204 | |||
| 1205 | if (to) | ||
| 1206 | xprt->timeout = *to; | ||
| 1207 | else | ||
| 1208 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); | ||
| 1209 | |||
| 1210 | return 0; | ||
| 1211 | } | ||
| 1212 | |||
| 1213 | /** | ||
| 1214 | * xs_setup_tcp - Set up transport to use a TCP socket | ||
| 1215 | * @xprt: transport to set up | ||
| 1216 | * @to: timeout parameters | ||
| 1217 | * | ||
| 1218 | */ | ||
| 1219 | int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
| 1220 | { | ||
| 1221 | size_t slot_table_size; | ||
| 1222 | |||
| 1223 | dprintk("RPC: setting up tcp-ipv4 transport...\n"); | ||
| 1224 | |||
| 1225 | xprt->max_reqs = xprt_tcp_slot_table_entries; | ||
| 1226 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
| 1227 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
| 1228 | if (xprt->slot == NULL) | ||
| 1229 | return -ENOMEM; | ||
| 1230 | memset(xprt->slot, 0, slot_table_size); | ||
| 1231 | |||
| 1232 | xprt->prot = IPPROTO_TCP; | ||
| 1233 | xprt->port = xprt_max_resvport; | ||
| 1234 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); | ||
| 1235 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
| 1236 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; | ||
| 1237 | |||
| 1238 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); | ||
| 1239 | xprt->bind_timeout = XS_BIND_TO; | ||
| 1240 | xprt->connect_timeout = XS_TCP_CONN_TO; | ||
| 1241 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
| 1242 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
| 1243 | |||
| 1244 | xprt->ops = &xs_tcp_ops; | ||
| 1245 | |||
| 1246 | if (to) | ||
| 1247 | xprt->timeout = *to; | ||
| 1248 | else | ||
| 1249 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); | ||
| 1250 | |||
| 1251 | return 0; | ||
| 1252 | } | ||
