diff options
Diffstat (limited to 'net')
30 files changed, 2848 insertions, 1608 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 1c52fe809eda..9602ceb3bac9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -940,7 +940,7 @@ static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, | |||
940 | int noblock, int *errcode) | 940 | int noblock, int *errcode) |
941 | { | 941 | { |
942 | struct sk_buff *skb; | 942 | struct sk_buff *skb; |
943 | unsigned int gfp_mask; | 943 | gfp_t gfp_mask; |
944 | long timeo; | 944 | long timeo; |
945 | int err; | 945 | int err; |
946 | 946 | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c index 29250749f16f..d59f86f7ceab 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -495,7 +495,7 @@ void dccp_send_close(struct sock *sk, const int active) | |||
495 | { | 495 | { |
496 | struct dccp_sock *dp = dccp_sk(sk); | 496 | struct dccp_sock *dp = dccp_sk(sk); |
497 | struct sk_buff *skb; | 497 | struct sk_buff *skb; |
498 | const unsigned int prio = active ? GFP_KERNEL : GFP_ATOMIC; | 498 | const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC; |
499 | 499 | ||
500 | skb = alloc_skb(sk->sk_prot->max_header, prio); | 500 | skb = alloc_skb(sk->sk_prot->max_header, prio); |
501 | if (skb == NULL) | 501 | if (skb == NULL) |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 677419d0c9ad..3e98b57578dc 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -2239,6 +2239,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp, | |||
2239 | /* Note, it is the only place, where | 2239 | /* Note, it is the only place, where |
2240 | * fast path is recovered for sending TCP. | 2240 | * fast path is recovered for sending TCP. |
2241 | */ | 2241 | */ |
2242 | tp->pred_flags = 0; | ||
2242 | tcp_fast_path_check(sk, tp); | 2243 | tcp_fast_path_check(sk, tp); |
2243 | 2244 | ||
2244 | if (nwin > tp->max_window) { | 2245 | if (nwin > tp->max_window) { |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 678c3f2c0d0b..291df2e4c492 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -827,7 +827,7 @@ struct netlink_broadcast_data { | |||
827 | int failure; | 827 | int failure; |
828 | int congested; | 828 | int congested; |
829 | int delivered; | 829 | int delivered; |
830 | unsigned int allocation; | 830 | gfp_t allocation; |
831 | struct sk_buff *skb, *skb2; | 831 | struct sk_buff *skb, *skb2; |
832 | }; | 832 | }; |
833 | 833 | ||
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 46a2ce00a29b..cdcab9ca4c60 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile | |||
@@ -6,7 +6,7 @@ | |||
6 | obj-$(CONFIG_SUNRPC) += sunrpc.o | 6 | obj-$(CONFIG_SUNRPC) += sunrpc.o |
7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ | 7 | obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ |
8 | 8 | ||
9 | sunrpc-y := clnt.o xprt.o sched.o \ | 9 | sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ |
10 | auth.o auth_null.o auth_unix.o \ | 10 | auth.o auth_null.o auth_unix.o \ |
11 | svc.o svcsock.o svcauth.o svcauth_unix.o \ | 11 | svc.o svcsock.o svcauth.o svcauth_unix.o \ |
12 | pmap_clnt.o timer.o xdr.o \ | 12 | pmap_clnt.o timer.o xdr.o \ |
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 505e2d4b3d62..a415d99c394d 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
14 | #include <linux/socket.h> | ||
15 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
16 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
17 | 16 | ||
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index fe1b874084bc..f3431a7e33da 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile | |||
@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \ | |||
10 | obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o | 10 | obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o |
11 | 11 | ||
12 | rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ | 12 | rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \ |
13 | gss_krb5_seqnum.o | 13 | gss_krb5_seqnum.o gss_krb5_wrap.o |
14 | 14 | ||
15 | obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o | 15 | obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o |
16 | 16 | ||
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 2f7b867161d2..f44f46f1d8e0 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c | |||
@@ -42,9 +42,8 @@ | |||
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/types.h> | 43 | #include <linux/types.h> |
44 | #include <linux/slab.h> | 44 | #include <linux/slab.h> |
45 | #include <linux/socket.h> | ||
46 | #include <linux/in.h> | ||
47 | #include <linux/sched.h> | 45 | #include <linux/sched.h> |
46 | #include <linux/pagemap.h> | ||
48 | #include <linux/sunrpc/clnt.h> | 47 | #include <linux/sunrpc/clnt.h> |
49 | #include <linux/sunrpc/auth.h> | 48 | #include <linux/sunrpc/auth.h> |
50 | #include <linux/sunrpc/auth_gss.h> | 49 | #include <linux/sunrpc/auth_gss.h> |
@@ -846,10 +845,8 @@ gss_marshal(struct rpc_task *task, u32 *p) | |||
846 | 845 | ||
847 | /* We compute the checksum for the verifier over the xdr-encoded bytes | 846 | /* We compute the checksum for the verifier over the xdr-encoded bytes |
848 | * starting with the xid and ending at the end of the credential: */ | 847 | * starting with the xid and ending at the end of the credential: */ |
849 | iov.iov_base = req->rq_snd_buf.head[0].iov_base; | 848 | iov.iov_base = xprt_skip_transport_header(task->tk_xprt, |
850 | if (task->tk_client->cl_xprt->stream) | 849 | req->rq_snd_buf.head[0].iov_base); |
851 | /* See clnt.c:call_header() */ | ||
852 | iov.iov_base += 4; | ||
853 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; | 850 | iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; |
854 | xdr_buf_from_iov(&iov, &verf_buf); | 851 | xdr_buf_from_iov(&iov, &verf_buf); |
855 | 852 | ||
@@ -857,9 +854,7 @@ gss_marshal(struct rpc_task *task, u32 *p) | |||
857 | *p++ = htonl(RPC_AUTH_GSS); | 854 | *p++ = htonl(RPC_AUTH_GSS); |
858 | 855 | ||
859 | mic.data = (u8 *)(p + 1); | 856 | mic.data = (u8 *)(p + 1); |
860 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | 857 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); |
861 | GSS_C_QOP_DEFAULT, | ||
862 | &verf_buf, &mic); | ||
863 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) { | 858 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) { |
864 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 859 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
865 | } else if (maj_stat != 0) { | 860 | } else if (maj_stat != 0) { |
@@ -890,10 +885,8 @@ static u32 * | |||
890 | gss_validate(struct rpc_task *task, u32 *p) | 885 | gss_validate(struct rpc_task *task, u32 *p) |
891 | { | 886 | { |
892 | struct rpc_cred *cred = task->tk_msg.rpc_cred; | 887 | struct rpc_cred *cred = task->tk_msg.rpc_cred; |
893 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | ||
894 | gc_base); | ||
895 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | 888 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); |
896 | u32 seq, qop_state; | 889 | u32 seq; |
897 | struct kvec iov; | 890 | struct kvec iov; |
898 | struct xdr_buf verf_buf; | 891 | struct xdr_buf verf_buf; |
899 | struct xdr_netobj mic; | 892 | struct xdr_netobj mic; |
@@ -914,23 +907,14 @@ gss_validate(struct rpc_task *task, u32 *p) | |||
914 | mic.data = (u8 *)p; | 907 | mic.data = (u8 *)p; |
915 | mic.len = len; | 908 | mic.len = len; |
916 | 909 | ||
917 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state); | 910 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); |
918 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 911 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
919 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 912 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
920 | if (maj_stat) | 913 | if (maj_stat) |
921 | goto out_bad; | 914 | goto out_bad; |
922 | switch (gss_cred->gc_service) { | 915 | /* We leave it to unwrap to calculate au_rslack. For now we just |
923 | case RPC_GSS_SVC_NONE: | 916 | * calculate the length of the verifier: */ |
924 | /* verifier data, flavor, length: */ | 917 | task->tk_auth->au_verfsize = XDR_QUADLEN(len) + 2; |
925 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; | ||
926 | break; | ||
927 | case RPC_GSS_SVC_INTEGRITY: | ||
928 | /* verifier data, flavor, length, length, sequence number: */ | ||
929 | task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; | ||
930 | break; | ||
931 | case RPC_GSS_SVC_PRIVACY: | ||
932 | goto out_bad; | ||
933 | } | ||
934 | gss_put_ctx(ctx); | 918 | gss_put_ctx(ctx); |
935 | dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", | 919 | dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", |
936 | task->tk_pid); | 920 | task->tk_pid); |
@@ -975,8 +959,7 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
975 | p = iov->iov_base + iov->iov_len; | 959 | p = iov->iov_base + iov->iov_len; |
976 | mic.data = (u8 *)(p + 1); | 960 | mic.data = (u8 *)(p + 1); |
977 | 961 | ||
978 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, | 962 | maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); |
979 | GSS_C_QOP_DEFAULT, &integ_buf, &mic); | ||
980 | status = -EIO; /* XXX? */ | 963 | status = -EIO; /* XXX? */ |
981 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 964 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
982 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 965 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
@@ -990,6 +973,113 @@ gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
990 | return 0; | 973 | return 0; |
991 | } | 974 | } |
992 | 975 | ||
976 | static void | ||
977 | priv_release_snd_buf(struct rpc_rqst *rqstp) | ||
978 | { | ||
979 | int i; | ||
980 | |||
981 | for (i=0; i < rqstp->rq_enc_pages_num; i++) | ||
982 | __free_page(rqstp->rq_enc_pages[i]); | ||
983 | kfree(rqstp->rq_enc_pages); | ||
984 | } | ||
985 | |||
986 | static int | ||
987 | alloc_enc_pages(struct rpc_rqst *rqstp) | ||
988 | { | ||
989 | struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; | ||
990 | int first, last, i; | ||
991 | |||
992 | if (snd_buf->page_len == 0) { | ||
993 | rqstp->rq_enc_pages_num = 0; | ||
994 | return 0; | ||
995 | } | ||
996 | |||
997 | first = snd_buf->page_base >> PAGE_CACHE_SHIFT; | ||
998 | last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_CACHE_SHIFT; | ||
999 | rqstp->rq_enc_pages_num = last - first + 1 + 1; | ||
1000 | rqstp->rq_enc_pages | ||
1001 | = kmalloc(rqstp->rq_enc_pages_num * sizeof(struct page *), | ||
1002 | GFP_NOFS); | ||
1003 | if (!rqstp->rq_enc_pages) | ||
1004 | goto out; | ||
1005 | for (i=0; i < rqstp->rq_enc_pages_num; i++) { | ||
1006 | rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); | ||
1007 | if (rqstp->rq_enc_pages[i] == NULL) | ||
1008 | goto out_free; | ||
1009 | } | ||
1010 | rqstp->rq_release_snd_buf = priv_release_snd_buf; | ||
1011 | return 0; | ||
1012 | out_free: | ||
1013 | for (i--; i >= 0; i--) { | ||
1014 | __free_page(rqstp->rq_enc_pages[i]); | ||
1015 | } | ||
1016 | out: | ||
1017 | return -EAGAIN; | ||
1018 | } | ||
1019 | |||
1020 | static inline int | ||
1021 | gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
1022 | kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj) | ||
1023 | { | ||
1024 | struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; | ||
1025 | u32 offset; | ||
1026 | u32 maj_stat; | ||
1027 | int status; | ||
1028 | u32 *opaque_len; | ||
1029 | struct page **inpages; | ||
1030 | int first; | ||
1031 | int pad; | ||
1032 | struct kvec *iov; | ||
1033 | char *tmp; | ||
1034 | |||
1035 | opaque_len = p++; | ||
1036 | offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; | ||
1037 | *p++ = htonl(rqstp->rq_seqno); | ||
1038 | |||
1039 | status = encode(rqstp, p, obj); | ||
1040 | if (status) | ||
1041 | return status; | ||
1042 | |||
1043 | status = alloc_enc_pages(rqstp); | ||
1044 | if (status) | ||
1045 | return status; | ||
1046 | first = snd_buf->page_base >> PAGE_CACHE_SHIFT; | ||
1047 | inpages = snd_buf->pages + first; | ||
1048 | snd_buf->pages = rqstp->rq_enc_pages; | ||
1049 | snd_buf->page_base -= first << PAGE_CACHE_SHIFT; | ||
1050 | /* Give the tail its own page, in case we need extra space in the | ||
1051 | * head when wrapping: */ | ||
1052 | if (snd_buf->page_len || snd_buf->tail[0].iov_len) { | ||
1053 | tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); | ||
1054 | memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); | ||
1055 | snd_buf->tail[0].iov_base = tmp; | ||
1056 | } | ||
1057 | maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); | ||
1058 | /* RPC_SLACK_SPACE should prevent this ever happening: */ | ||
1059 | BUG_ON(snd_buf->len > snd_buf->buflen); | ||
1060 | status = -EIO; | ||
1061 | /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was | ||
1062 | * done anyway, so it's safe to put the request on the wire: */ | ||
1063 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
1064 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
1065 | else if (maj_stat) | ||
1066 | return status; | ||
1067 | |||
1068 | *opaque_len = htonl(snd_buf->len - offset); | ||
1069 | /* guess whether we're in the head or the tail: */ | ||
1070 | if (snd_buf->page_len || snd_buf->tail[0].iov_len) | ||
1071 | iov = snd_buf->tail; | ||
1072 | else | ||
1073 | iov = snd_buf->head; | ||
1074 | p = iov->iov_base + iov->iov_len; | ||
1075 | pad = 3 - ((snd_buf->len - offset - 1) & 3); | ||
1076 | memset(p, 0, pad); | ||
1077 | iov->iov_len += pad; | ||
1078 | snd_buf->len += pad; | ||
1079 | |||
1080 | return 0; | ||
1081 | } | ||
1082 | |||
993 | static int | 1083 | static int |
994 | gss_wrap_req(struct rpc_task *task, | 1084 | gss_wrap_req(struct rpc_task *task, |
995 | kxdrproc_t encode, void *rqstp, u32 *p, void *obj) | 1085 | kxdrproc_t encode, void *rqstp, u32 *p, void *obj) |
@@ -1017,6 +1107,8 @@ gss_wrap_req(struct rpc_task *task, | |||
1017 | rqstp, p, obj); | 1107 | rqstp, p, obj); |
1018 | break; | 1108 | break; |
1019 | case RPC_GSS_SVC_PRIVACY: | 1109 | case RPC_GSS_SVC_PRIVACY: |
1110 | status = gss_wrap_req_priv(cred, ctx, encode, | ||
1111 | rqstp, p, obj); | ||
1020 | break; | 1112 | break; |
1021 | } | 1113 | } |
1022 | out: | 1114 | out: |
@@ -1054,8 +1146,7 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
1054 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) | 1146 | if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) |
1055 | return status; | 1147 | return status; |
1056 | 1148 | ||
1057 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, | 1149 | maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); |
1058 | &mic, NULL); | ||
1059 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | 1150 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) |
1060 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | 1151 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; |
1061 | if (maj_stat != GSS_S_COMPLETE) | 1152 | if (maj_stat != GSS_S_COMPLETE) |
@@ -1063,6 +1154,35 @@ gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | |||
1063 | return 0; | 1154 | return 0; |
1064 | } | 1155 | } |
1065 | 1156 | ||
1157 | static inline int | ||
1158 | gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, | ||
1159 | struct rpc_rqst *rqstp, u32 **p) | ||
1160 | { | ||
1161 | struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; | ||
1162 | u32 offset; | ||
1163 | u32 opaque_len; | ||
1164 | u32 maj_stat; | ||
1165 | int status = -EIO; | ||
1166 | |||
1167 | opaque_len = ntohl(*(*p)++); | ||
1168 | offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base; | ||
1169 | if (offset + opaque_len > rcv_buf->len) | ||
1170 | return status; | ||
1171 | /* remove padding: */ | ||
1172 | rcv_buf->len = offset + opaque_len; | ||
1173 | |||
1174 | maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); | ||
1175 | if (maj_stat == GSS_S_CONTEXT_EXPIRED) | ||
1176 | cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; | ||
1177 | if (maj_stat != GSS_S_COMPLETE) | ||
1178 | return status; | ||
1179 | if (ntohl(*(*p)++) != rqstp->rq_seqno) | ||
1180 | return status; | ||
1181 | |||
1182 | return 0; | ||
1183 | } | ||
1184 | |||
1185 | |||
1066 | static int | 1186 | static int |
1067 | gss_unwrap_resp(struct rpc_task *task, | 1187 | gss_unwrap_resp(struct rpc_task *task, |
1068 | kxdrproc_t decode, void *rqstp, u32 *p, void *obj) | 1188 | kxdrproc_t decode, void *rqstp, u32 *p, void *obj) |
@@ -1071,6 +1191,9 @@ gss_unwrap_resp(struct rpc_task *task, | |||
1071 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, | 1191 | struct gss_cred *gss_cred = container_of(cred, struct gss_cred, |
1072 | gc_base); | 1192 | gc_base); |
1073 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); | 1193 | struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); |
1194 | u32 *savedp = p; | ||
1195 | struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head; | ||
1196 | int savedlen = head->iov_len; | ||
1074 | int status = -EIO; | 1197 | int status = -EIO; |
1075 | 1198 | ||
1076 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) | 1199 | if (ctx->gc_proc != RPC_GSS_PROC_DATA) |
@@ -1084,8 +1207,14 @@ gss_unwrap_resp(struct rpc_task *task, | |||
1084 | goto out; | 1207 | goto out; |
1085 | break; | 1208 | break; |
1086 | case RPC_GSS_SVC_PRIVACY: | 1209 | case RPC_GSS_SVC_PRIVACY: |
1210 | status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p); | ||
1211 | if (status) | ||
1212 | goto out; | ||
1087 | break; | 1213 | break; |
1088 | } | 1214 | } |
1215 | /* take into account extra slack for integrity and privacy cases: */ | ||
1216 | task->tk_auth->au_rslack = task->tk_auth->au_verfsize + (p - savedp) | ||
1217 | + (savedlen - head->iov_len); | ||
1089 | out_decode: | 1218 | out_decode: |
1090 | status = decode(rqstp, p, obj); | 1219 | status = decode(rqstp, p, obj); |
1091 | out: | 1220 | out: |
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index ee6ae74cd1b2..3f3d5437f02d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c | |||
@@ -139,17 +139,91 @@ buf_to_sg(struct scatterlist *sg, char *ptr, int len) { | |||
139 | sg->length = len; | 139 | sg->length = len; |
140 | } | 140 | } |
141 | 141 | ||
142 | static int | ||
143 | process_xdr_buf(struct xdr_buf *buf, int offset, int len, | ||
144 | int (*actor)(struct scatterlist *, void *), void *data) | ||
145 | { | ||
146 | int i, page_len, thislen, page_offset, ret = 0; | ||
147 | struct scatterlist sg[1]; | ||
148 | |||
149 | if (offset >= buf->head[0].iov_len) { | ||
150 | offset -= buf->head[0].iov_len; | ||
151 | } else { | ||
152 | thislen = buf->head[0].iov_len - offset; | ||
153 | if (thislen > len) | ||
154 | thislen = len; | ||
155 | buf_to_sg(sg, buf->head[0].iov_base + offset, thislen); | ||
156 | ret = actor(sg, data); | ||
157 | if (ret) | ||
158 | goto out; | ||
159 | offset = 0; | ||
160 | len -= thislen; | ||
161 | } | ||
162 | if (len == 0) | ||
163 | goto out; | ||
164 | |||
165 | if (offset >= buf->page_len) { | ||
166 | offset -= buf->page_len; | ||
167 | } else { | ||
168 | page_len = buf->page_len - offset; | ||
169 | if (page_len > len) | ||
170 | page_len = len; | ||
171 | len -= page_len; | ||
172 | page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1); | ||
173 | i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT; | ||
174 | thislen = PAGE_CACHE_SIZE - page_offset; | ||
175 | do { | ||
176 | if (thislen > page_len) | ||
177 | thislen = page_len; | ||
178 | sg->page = buf->pages[i]; | ||
179 | sg->offset = page_offset; | ||
180 | sg->length = thislen; | ||
181 | ret = actor(sg, data); | ||
182 | if (ret) | ||
183 | goto out; | ||
184 | page_len -= thislen; | ||
185 | i++; | ||
186 | page_offset = 0; | ||
187 | thislen = PAGE_CACHE_SIZE; | ||
188 | } while (page_len != 0); | ||
189 | offset = 0; | ||
190 | } | ||
191 | if (len == 0) | ||
192 | goto out; | ||
193 | |||
194 | if (offset < buf->tail[0].iov_len) { | ||
195 | thislen = buf->tail[0].iov_len - offset; | ||
196 | if (thislen > len) | ||
197 | thislen = len; | ||
198 | buf_to_sg(sg, buf->tail[0].iov_base + offset, thislen); | ||
199 | ret = actor(sg, data); | ||
200 | len -= thislen; | ||
201 | } | ||
202 | if (len != 0) | ||
203 | ret = -EINVAL; | ||
204 | out: | ||
205 | return ret; | ||
206 | } | ||
207 | |||
208 | static int | ||
209 | checksummer(struct scatterlist *sg, void *data) | ||
210 | { | ||
211 | struct crypto_tfm *tfm = (struct crypto_tfm *)data; | ||
212 | |||
213 | crypto_digest_update(tfm, sg, 1); | ||
214 | |||
215 | return 0; | ||
216 | } | ||
217 | |||
142 | /* checksum the plaintext data and hdrlen bytes of the token header */ | 218 | /* checksum the plaintext data and hdrlen bytes of the token header */ |
143 | s32 | 219 | s32 |
144 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | 220 | make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, |
145 | struct xdr_netobj *cksum) | 221 | int body_offset, struct xdr_netobj *cksum) |
146 | { | 222 | { |
147 | char *cksumname; | 223 | char *cksumname; |
148 | struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ | 224 | struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ |
149 | struct scatterlist sg[1]; | 225 | struct scatterlist sg[1]; |
150 | u32 code = GSS_S_FAILURE; | 226 | u32 code = GSS_S_FAILURE; |
151 | int len, thislen, offset; | ||
152 | int i; | ||
153 | 227 | ||
154 | switch (cksumtype) { | 228 | switch (cksumtype) { |
155 | case CKSUMTYPE_RSA_MD5: | 229 | case CKSUMTYPE_RSA_MD5: |
@@ -169,33 +243,8 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body, | |||
169 | crypto_digest_init(tfm); | 243 | crypto_digest_init(tfm); |
170 | buf_to_sg(sg, header, hdrlen); | 244 | buf_to_sg(sg, header, hdrlen); |
171 | crypto_digest_update(tfm, sg, 1); | 245 | crypto_digest_update(tfm, sg, 1); |
172 | if (body->head[0].iov_len) { | 246 | process_xdr_buf(body, body_offset, body->len - body_offset, |
173 | buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); | 247 | checksummer, tfm); |
174 | crypto_digest_update(tfm, sg, 1); | ||
175 | } | ||
176 | |||
177 | len = body->page_len; | ||
178 | if (len != 0) { | ||
179 | offset = body->page_base & (PAGE_CACHE_SIZE - 1); | ||
180 | i = body->page_base >> PAGE_CACHE_SHIFT; | ||
181 | thislen = PAGE_CACHE_SIZE - offset; | ||
182 | do { | ||
183 | if (thislen > len) | ||
184 | thislen = len; | ||
185 | sg->page = body->pages[i]; | ||
186 | sg->offset = offset; | ||
187 | sg->length = thislen; | ||
188 | crypto_digest_update(tfm, sg, 1); | ||
189 | len -= thislen; | ||
190 | i++; | ||
191 | offset = 0; | ||
192 | thislen = PAGE_CACHE_SIZE; | ||
193 | } while(len != 0); | ||
194 | } | ||
195 | if (body->tail[0].iov_len) { | ||
196 | buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); | ||
197 | crypto_digest_update(tfm, sg, 1); | ||
198 | } | ||
199 | crypto_digest_final(tfm, cksum->data); | 248 | crypto_digest_final(tfm, cksum->data); |
200 | code = 0; | 249 | code = 0; |
201 | out: | 250 | out: |
@@ -204,3 +253,154 @@ out: | |||
204 | } | 253 | } |
205 | 254 | ||
206 | EXPORT_SYMBOL(make_checksum); | 255 | EXPORT_SYMBOL(make_checksum); |
256 | |||
257 | struct encryptor_desc { | ||
258 | u8 iv[8]; /* XXX hard-coded blocksize */ | ||
259 | struct crypto_tfm *tfm; | ||
260 | int pos; | ||
261 | struct xdr_buf *outbuf; | ||
262 | struct page **pages; | ||
263 | struct scatterlist infrags[4]; | ||
264 | struct scatterlist outfrags[4]; | ||
265 | int fragno; | ||
266 | int fraglen; | ||
267 | }; | ||
268 | |||
269 | static int | ||
270 | encryptor(struct scatterlist *sg, void *data) | ||
271 | { | ||
272 | struct encryptor_desc *desc = data; | ||
273 | struct xdr_buf *outbuf = desc->outbuf; | ||
274 | struct page *in_page; | ||
275 | int thislen = desc->fraglen + sg->length; | ||
276 | int fraglen, ret; | ||
277 | int page_pos; | ||
278 | |||
279 | /* Worst case is 4 fragments: head, end of page 1, start | ||
280 | * of page 2, tail. Anything more is a bug. */ | ||
281 | BUG_ON(desc->fragno > 3); | ||
282 | desc->infrags[desc->fragno] = *sg; | ||
283 | desc->outfrags[desc->fragno] = *sg; | ||
284 | |||
285 | page_pos = desc->pos - outbuf->head[0].iov_len; | ||
286 | if (page_pos >= 0 && page_pos < outbuf->page_len) { | ||
287 | /* pages are not in place: */ | ||
288 | int i = (page_pos + outbuf->page_base) >> PAGE_CACHE_SHIFT; | ||
289 | in_page = desc->pages[i]; | ||
290 | } else { | ||
291 | in_page = sg->page; | ||
292 | } | ||
293 | desc->infrags[desc->fragno].page = in_page; | ||
294 | desc->fragno++; | ||
295 | desc->fraglen += sg->length; | ||
296 | desc->pos += sg->length; | ||
297 | |||
298 | fraglen = thislen & 7; /* XXX hardcoded blocksize */ | ||
299 | thislen -= fraglen; | ||
300 | |||
301 | if (thislen == 0) | ||
302 | return 0; | ||
303 | |||
304 | ret = crypto_cipher_encrypt_iv(desc->tfm, desc->outfrags, desc->infrags, | ||
305 | thislen, desc->iv); | ||
306 | if (ret) | ||
307 | return ret; | ||
308 | if (fraglen) { | ||
309 | desc->outfrags[0].page = sg->page; | ||
310 | desc->outfrags[0].offset = sg->offset + sg->length - fraglen; | ||
311 | desc->outfrags[0].length = fraglen; | ||
312 | desc->infrags[0] = desc->outfrags[0]; | ||
313 | desc->infrags[0].page = in_page; | ||
314 | desc->fragno = 1; | ||
315 | desc->fraglen = fraglen; | ||
316 | } else { | ||
317 | desc->fragno = 0; | ||
318 | desc->fraglen = 0; | ||
319 | } | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | int | ||
324 | gss_encrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset, | ||
325 | struct page **pages) | ||
326 | { | ||
327 | int ret; | ||
328 | struct encryptor_desc desc; | ||
329 | |||
330 | BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); | ||
331 | |||
332 | memset(desc.iv, 0, sizeof(desc.iv)); | ||
333 | desc.tfm = tfm; | ||
334 | desc.pos = offset; | ||
335 | desc.outbuf = buf; | ||
336 | desc.pages = pages; | ||
337 | desc.fragno = 0; | ||
338 | desc.fraglen = 0; | ||
339 | |||
340 | ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc); | ||
341 | return ret; | ||
342 | } | ||
343 | |||
344 | EXPORT_SYMBOL(gss_encrypt_xdr_buf); | ||
345 | |||
346 | struct decryptor_desc { | ||
347 | u8 iv[8]; /* XXX hard-coded blocksize */ | ||
348 | struct crypto_tfm *tfm; | ||
349 | struct scatterlist frags[4]; | ||
350 | int fragno; | ||
351 | int fraglen; | ||
352 | }; | ||
353 | |||
354 | static int | ||
355 | decryptor(struct scatterlist *sg, void *data) | ||
356 | { | ||
357 | struct decryptor_desc *desc = data; | ||
358 | int thislen = desc->fraglen + sg->length; | ||
359 | int fraglen, ret; | ||
360 | |||
361 | /* Worst case is 4 fragments: head, end of page 1, start | ||
362 | * of page 2, tail. Anything more is a bug. */ | ||
363 | BUG_ON(desc->fragno > 3); | ||
364 | desc->frags[desc->fragno] = *sg; | ||
365 | desc->fragno++; | ||
366 | desc->fraglen += sg->length; | ||
367 | |||
368 | fraglen = thislen & 7; /* XXX hardcoded blocksize */ | ||
369 | thislen -= fraglen; | ||
370 | |||
371 | if (thislen == 0) | ||
372 | return 0; | ||
373 | |||
374 | ret = crypto_cipher_decrypt_iv(desc->tfm, desc->frags, desc->frags, | ||
375 | thislen, desc->iv); | ||
376 | if (ret) | ||
377 | return ret; | ||
378 | if (fraglen) { | ||
379 | desc->frags[0].page = sg->page; | ||
380 | desc->frags[0].offset = sg->offset + sg->length - fraglen; | ||
381 | desc->frags[0].length = fraglen; | ||
382 | desc->fragno = 1; | ||
383 | desc->fraglen = fraglen; | ||
384 | } else { | ||
385 | desc->fragno = 0; | ||
386 | desc->fraglen = 0; | ||
387 | } | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | int | ||
392 | gss_decrypt_xdr_buf(struct crypto_tfm *tfm, struct xdr_buf *buf, int offset) | ||
393 | { | ||
394 | struct decryptor_desc desc; | ||
395 | |||
396 | /* XXXJBF: */ | ||
397 | BUG_ON((buf->len - offset) % crypto_tfm_alg_blocksize(tfm) != 0); | ||
398 | |||
399 | memset(desc.iv, 0, sizeof(desc.iv)); | ||
400 | desc.tfm = tfm; | ||
401 | desc.fragno = 0; | ||
402 | desc.fraglen = 0; | ||
403 | return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc); | ||
404 | } | ||
405 | |||
406 | EXPORT_SYMBOL(gss_decrypt_xdr_buf); | ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 606a8a82cafb..5f1f806a0b11 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <linux/types.h> | 39 | #include <linux/types.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/sunrpc/auth.h> | 41 | #include <linux/sunrpc/auth.h> |
42 | #include <linux/in.h> | ||
43 | #include <linux/sunrpc/gss_krb5.h> | 42 | #include <linux/sunrpc/gss_krb5.h> |
44 | #include <linux/sunrpc/xdr.h> | 43 | #include <linux/sunrpc/xdr.h> |
45 | #include <linux/crypto.h> | 44 | #include <linux/crypto.h> |
@@ -191,43 +190,12 @@ gss_delete_sec_context_kerberos(void *internal_ctx) { | |||
191 | kfree(kctx); | 190 | kfree(kctx); |
192 | } | 191 | } |
193 | 192 | ||
194 | static u32 | ||
195 | gss_verify_mic_kerberos(struct gss_ctx *ctx, | ||
196 | struct xdr_buf *message, | ||
197 | struct xdr_netobj *mic_token, | ||
198 | u32 *qstate) { | ||
199 | u32 maj_stat = 0; | ||
200 | int qop_state; | ||
201 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
202 | |||
203 | maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, | ||
204 | KG_TOK_MIC_MSG); | ||
205 | if (!maj_stat && qop_state) | ||
206 | *qstate = qop_state; | ||
207 | |||
208 | dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); | ||
209 | return maj_stat; | ||
210 | } | ||
211 | |||
212 | static u32 | ||
213 | gss_get_mic_kerberos(struct gss_ctx *ctx, | ||
214 | u32 qop, | ||
215 | struct xdr_buf *message, | ||
216 | struct xdr_netobj *mic_token) { | ||
217 | u32 err = 0; | ||
218 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
219 | |||
220 | err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); | ||
221 | |||
222 | dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); | ||
223 | |||
224 | return err; | ||
225 | } | ||
226 | |||
227 | static struct gss_api_ops gss_kerberos_ops = { | 193 | static struct gss_api_ops gss_kerberos_ops = { |
228 | .gss_import_sec_context = gss_import_sec_context_kerberos, | 194 | .gss_import_sec_context = gss_import_sec_context_kerberos, |
229 | .gss_get_mic = gss_get_mic_kerberos, | 195 | .gss_get_mic = gss_get_mic_kerberos, |
230 | .gss_verify_mic = gss_verify_mic_kerberos, | 196 | .gss_verify_mic = gss_verify_mic_kerberos, |
197 | .gss_wrap = gss_wrap_kerberos, | ||
198 | .gss_unwrap = gss_unwrap_kerberos, | ||
231 | .gss_delete_sec_context = gss_delete_sec_context_kerberos, | 199 | .gss_delete_sec_context = gss_delete_sec_context_kerberos, |
232 | }; | 200 | }; |
233 | 201 | ||
@@ -242,6 +210,11 @@ static struct pf_desc gss_kerberos_pfs[] = { | |||
242 | .service = RPC_GSS_SVC_INTEGRITY, | 210 | .service = RPC_GSS_SVC_INTEGRITY, |
243 | .name = "krb5i", | 211 | .name = "krb5i", |
244 | }, | 212 | }, |
213 | [2] = { | ||
214 | .pseudoflavor = RPC_AUTH_GSS_KRB5P, | ||
215 | .service = RPC_GSS_SVC_PRIVACY, | ||
216 | .name = "krb5p", | ||
217 | }, | ||
245 | }; | 218 | }; |
246 | 219 | ||
247 | static struct gss_api_mech gss_kerberos_mech = { | 220 | static struct gss_api_mech gss_kerberos_mech = { |
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index afeeb8715a77..13f8ae979454 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c | |||
@@ -70,22 +70,13 @@ | |||
70 | # define RPCDBG_FACILITY RPCDBG_AUTH | 70 | # define RPCDBG_FACILITY RPCDBG_AUTH |
71 | #endif | 71 | #endif |
72 | 72 | ||
73 | static inline int | ||
74 | gss_krb5_padding(int blocksize, int length) { | ||
75 | /* Most of the code is block-size independent but in practice we | ||
76 | * use only 8: */ | ||
77 | BUG_ON(blocksize != 8); | ||
78 | return 8 - (length & 7); | ||
79 | } | ||
80 | |||
81 | u32 | 73 | u32 |
82 | krb5_make_token(struct krb5_ctx *ctx, int qop_req, | 74 | gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text, |
83 | struct xdr_buf *text, struct xdr_netobj *token, | 75 | struct xdr_netobj *token) |
84 | int toktype) | ||
85 | { | 76 | { |
77 | struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; | ||
86 | s32 checksum_type; | 78 | s32 checksum_type; |
87 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | 79 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; |
88 | int blocksize = 0, tmsglen; | ||
89 | unsigned char *ptr, *krb5_hdr, *msg_start; | 80 | unsigned char *ptr, *krb5_hdr, *msg_start; |
90 | s32 now; | 81 | s32 now; |
91 | 82 | ||
@@ -93,9 +84,6 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
93 | 84 | ||
94 | now = get_seconds(); | 85 | now = get_seconds(); |
95 | 86 | ||
96 | if (qop_req != 0) | ||
97 | goto out_err; | ||
98 | |||
99 | switch (ctx->signalg) { | 87 | switch (ctx->signalg) { |
100 | case SGN_ALG_DES_MAC_MD5: | 88 | case SGN_ALG_DES_MAC_MD5: |
101 | checksum_type = CKSUMTYPE_RSA_MD5; | 89 | checksum_type = CKSUMTYPE_RSA_MD5; |
@@ -111,21 +99,13 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
111 | goto out_err; | 99 | goto out_err; |
112 | } | 100 | } |
113 | 101 | ||
114 | if (toktype == KG_TOK_WRAP_MSG) { | 102 | token->len = g_token_size(&ctx->mech_used, 22); |
115 | blocksize = crypto_tfm_alg_blocksize(ctx->enc); | ||
116 | tmsglen = blocksize + text->len | ||
117 | + gss_krb5_padding(blocksize, blocksize + text->len); | ||
118 | } else { | ||
119 | tmsglen = 0; | ||
120 | } | ||
121 | |||
122 | token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); | ||
123 | 103 | ||
124 | ptr = token->data; | 104 | ptr = token->data; |
125 | g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr); | 105 | g_make_token_header(&ctx->mech_used, 22, &ptr); |
126 | 106 | ||
127 | *ptr++ = (unsigned char) ((toktype>>8)&0xff); | 107 | *ptr++ = (unsigned char) ((KG_TOK_MIC_MSG>>8)&0xff); |
128 | *ptr++ = (unsigned char) (toktype&0xff); | 108 | *ptr++ = (unsigned char) (KG_TOK_MIC_MSG&0xff); |
129 | 109 | ||
130 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ | 110 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ |
131 | krb5_hdr = ptr - 2; | 111 | krb5_hdr = ptr - 2; |
@@ -133,17 +113,9 @@ krb5_make_token(struct krb5_ctx *ctx, int qop_req, | |||
133 | 113 | ||
134 | *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); | 114 | *(u16 *)(krb5_hdr + 2) = htons(ctx->signalg); |
135 | memset(krb5_hdr + 4, 0xff, 4); | 115 | memset(krb5_hdr + 4, 0xff, 4); |
136 | if (toktype == KG_TOK_WRAP_MSG) | ||
137 | *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); | ||
138 | 116 | ||
139 | if (toktype == KG_TOK_WRAP_MSG) { | 117 | if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum)) |
140 | /* XXX removing support for now */ | ||
141 | goto out_err; | ||
142 | } else { /* Sign only. */ | ||
143 | if (make_checksum(checksum_type, krb5_hdr, 8, text, | ||
144 | &md5cksum)) | ||
145 | goto out_err; | 118 | goto out_err; |
146 | } | ||
147 | 119 | ||
148 | switch (ctx->signalg) { | 120 | switch (ctx->signalg) { |
149 | case SGN_ALG_DES_MAC_MD5: | 121 | case SGN_ALG_DES_MAC_MD5: |
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c index 8767fc53183d..2030475d98ed 100644 --- a/net/sunrpc/auth_gss/gss_krb5_unseal.c +++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c | |||
@@ -68,21 +68,14 @@ | |||
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | 70 | ||
71 | /* message_buffer is an input if toktype is MIC and an output if it is WRAP: | 71 | /* read_token is a mic token, and message_buffer is the data that the mic was |
72 | * If toktype is MIC: read_token is a mic token, and message_buffer is the | 72 | * supposedly taken over. */ |
73 | * data that the mic was supposedly taken over. | ||
74 | * If toktype is WRAP: read_token is a wrap token, and message_buffer is used | ||
75 | * to return the decrypted data. | ||
76 | */ | ||
77 | 73 | ||
78 | /* XXX will need to change prototype and/or just split into a separate function | ||
79 | * when we add privacy (because read_token will be in pages too). */ | ||
80 | u32 | 74 | u32 |
81 | krb5_read_token(struct krb5_ctx *ctx, | 75 | gss_verify_mic_kerberos(struct gss_ctx *gss_ctx, |
82 | struct xdr_netobj *read_token, | 76 | struct xdr_buf *message_buffer, struct xdr_netobj *read_token) |
83 | struct xdr_buf *message_buffer, | ||
84 | int *qop_state, int toktype) | ||
85 | { | 77 | { |
78 | struct krb5_ctx *ctx = gss_ctx->internal_ctx_id; | ||
86 | int signalg; | 79 | int signalg; |
87 | int sealalg; | 80 | int sealalg; |
88 | s32 checksum_type; | 81 | s32 checksum_type; |
@@ -100,16 +93,12 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
100 | read_token->len)) | 93 | read_token->len)) |
101 | goto out; | 94 | goto out; |
102 | 95 | ||
103 | if ((*ptr++ != ((toktype>>8)&0xff)) || (*ptr++ != (toktype&0xff))) | 96 | if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) || |
97 | (*ptr++ != ( KG_TOK_MIC_MSG &0xff)) ) | ||
104 | goto out; | 98 | goto out; |
105 | 99 | ||
106 | /* XXX sanity-check bodysize?? */ | 100 | /* XXX sanity-check bodysize?? */ |
107 | 101 | ||
108 | if (toktype == KG_TOK_WRAP_MSG) { | ||
109 | /* XXX gone */ | ||
110 | goto out; | ||
111 | } | ||
112 | |||
113 | /* get the sign and seal algorithms */ | 102 | /* get the sign and seal algorithms */ |
114 | 103 | ||
115 | signalg = ptr[0] + (ptr[1] << 8); | 104 | signalg = ptr[0] + (ptr[1] << 8); |
@@ -120,14 +109,7 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
120 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) | 109 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) |
121 | goto out; | 110 | goto out; |
122 | 111 | ||
123 | if (((toktype != KG_TOK_WRAP_MSG) && (sealalg != 0xffff)) || | 112 | if (sealalg != 0xffff) |
124 | ((toktype == KG_TOK_WRAP_MSG) && (sealalg == 0xffff))) | ||
125 | goto out; | ||
126 | |||
127 | /* in the current spec, there is only one valid seal algorithm per | ||
128 | key type, so a simple comparison is ok */ | ||
129 | |||
130 | if ((toktype == KG_TOK_WRAP_MSG) && !(sealalg == ctx->sealalg)) | ||
131 | goto out; | 113 | goto out; |
132 | 114 | ||
133 | /* there are several mappings of seal algorithms to sign algorithms, | 115 | /* there are several mappings of seal algorithms to sign algorithms, |
@@ -154,7 +136,7 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
154 | switch (signalg) { | 136 | switch (signalg) { |
155 | case SGN_ALG_DES_MAC_MD5: | 137 | case SGN_ALG_DES_MAC_MD5: |
156 | ret = make_checksum(checksum_type, ptr - 2, 8, | 138 | ret = make_checksum(checksum_type, ptr - 2, 8, |
157 | message_buffer, &md5cksum); | 139 | message_buffer, 0, &md5cksum); |
158 | if (ret) | 140 | if (ret) |
159 | goto out; | 141 | goto out; |
160 | 142 | ||
@@ -175,9 +157,6 @@ krb5_read_token(struct krb5_ctx *ctx, | |||
175 | 157 | ||
176 | /* it got through unscathed. Make sure the context is unexpired */ | 158 | /* it got through unscathed. Make sure the context is unexpired */ |
177 | 159 | ||
178 | if (qop_state) | ||
179 | *qop_state = GSS_C_QOP_DEFAULT; | ||
180 | |||
181 | now = get_seconds(); | 160 | now = get_seconds(); |
182 | 161 | ||
183 | ret = GSS_S_CONTEXT_EXPIRED; | 162 | ret = GSS_S_CONTEXT_EXPIRED; |
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c new file mode 100644 index 000000000000..af777cf9f251 --- /dev/null +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c | |||
@@ -0,0 +1,363 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include <linux/slab.h> | ||
3 | #include <linux/jiffies.h> | ||
4 | #include <linux/sunrpc/gss_krb5.h> | ||
5 | #include <linux/random.h> | ||
6 | #include <linux/pagemap.h> | ||
7 | #include <asm/scatterlist.h> | ||
8 | #include <linux/crypto.h> | ||
9 | |||
10 | #ifdef RPC_DEBUG | ||
11 | # define RPCDBG_FACILITY RPCDBG_AUTH | ||
12 | #endif | ||
13 | |||
14 | static inline int | ||
15 | gss_krb5_padding(int blocksize, int length) | ||
16 | { | ||
17 | /* Most of the code is block-size independent but currently we | ||
18 | * use only 8: */ | ||
19 | BUG_ON(blocksize != 8); | ||
20 | return 8 - (length & 7); | ||
21 | } | ||
22 | |||
23 | static inline void | ||
24 | gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize) | ||
25 | { | ||
26 | int padding = gss_krb5_padding(blocksize, buf->len - offset); | ||
27 | char *p; | ||
28 | struct kvec *iov; | ||
29 | |||
30 | if (buf->page_len || buf->tail[0].iov_len) | ||
31 | iov = &buf->tail[0]; | ||
32 | else | ||
33 | iov = &buf->head[0]; | ||
34 | p = iov->iov_base + iov->iov_len; | ||
35 | iov->iov_len += padding; | ||
36 | buf->len += padding; | ||
37 | memset(p, padding, padding); | ||
38 | } | ||
39 | |||
40 | static inline int | ||
41 | gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) | ||
42 | { | ||
43 | u8 *ptr; | ||
44 | u8 pad; | ||
45 | int len = buf->len; | ||
46 | |||
47 | if (len <= buf->head[0].iov_len) { | ||
48 | pad = *(u8 *)(buf->head[0].iov_base + len - 1); | ||
49 | if (pad > buf->head[0].iov_len) | ||
50 | return -EINVAL; | ||
51 | buf->head[0].iov_len -= pad; | ||
52 | goto out; | ||
53 | } else | ||
54 | len -= buf->head[0].iov_len; | ||
55 | if (len <= buf->page_len) { | ||
56 | int last = (buf->page_base + len - 1) | ||
57 | >>PAGE_CACHE_SHIFT; | ||
58 | int offset = (buf->page_base + len - 1) | ||
59 | & (PAGE_CACHE_SIZE - 1); | ||
60 | ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA); | ||
61 | pad = *(ptr + offset); | ||
62 | kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA); | ||
63 | goto out; | ||
64 | } else | ||
65 | len -= buf->page_len; | ||
66 | BUG_ON(len > buf->tail[0].iov_len); | ||
67 | pad = *(u8 *)(buf->tail[0].iov_base + len - 1); | ||
68 | out: | ||
69 | /* XXX: NOTE: we do not adjust the page lengths--they represent | ||
70 | * a range of data in the real filesystem page cache, and we need | ||
71 | * to know that range so the xdr code can properly place read data. | ||
72 | * However adjusting the head length, as we do above, is harmless. | ||
73 | * In the case of a request that fits into a single page, the server | ||
74 | * also uses length and head length together to determine the original | ||
75 | * start of the request to copy the request for deferal; so it's | ||
76 | * easier on the server if we adjust head and tail length in tandem. | ||
77 | * It's not really a problem that we don't fool with the page and | ||
78 | * tail lengths, though--at worst badly formed xdr might lead the | ||
79 | * server to attempt to parse the padding. | ||
80 | * XXX: Document all these weird requirements for gss mechanism | ||
81 | * wrap/unwrap functions. */ | ||
82 | if (pad > blocksize) | ||
83 | return -EINVAL; | ||
84 | if (buf->len > pad) | ||
85 | buf->len -= pad; | ||
86 | else | ||
87 | return -EINVAL; | ||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static inline void | ||
92 | make_confounder(char *p, int blocksize) | ||
93 | { | ||
94 | static u64 i = 0; | ||
95 | u64 *q = (u64 *)p; | ||
96 | |||
97 | /* rfc1964 claims this should be "random". But all that's really | ||
98 | * necessary is that it be unique. And not even that is necessary in | ||
99 | * our case since our "gssapi" implementation exists only to support | ||
100 | * rpcsec_gss, so we know that the only buffers we will ever encrypt | ||
101 | * already begin with a unique sequence number. Just to hedge my bets | ||
102 | * I'll make a half-hearted attempt at something unique, but ensuring | ||
103 | * uniqueness would mean worrying about atomicity and rollover, and I | ||
104 | * don't care enough. */ | ||
105 | |||
106 | BUG_ON(blocksize != 8); | ||
107 | *q = i++; | ||
108 | } | ||
109 | |||
110 | /* Assumptions: the head and tail of inbuf are ours to play with. | ||
111 | * The pages, however, may be real pages in the page cache and we replace | ||
112 | * them with scratch pages from **pages before writing to them. */ | ||
113 | /* XXX: obviously the above should be documentation of wrap interface, | ||
114 | * and shouldn't be in this kerberos-specific file. */ | ||
115 | |||
116 | /* XXX factor out common code with seal/unseal. */ | ||
117 | |||
118 | u32 | ||
119 | gss_wrap_kerberos(struct gss_ctx *ctx, int offset, | ||
120 | struct xdr_buf *buf, struct page **pages) | ||
121 | { | ||
122 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
123 | s32 checksum_type; | ||
124 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
125 | int blocksize = 0, plainlen; | ||
126 | unsigned char *ptr, *krb5_hdr, *msg_start; | ||
127 | s32 now; | ||
128 | int headlen; | ||
129 | struct page **tmp_pages; | ||
130 | |||
131 | dprintk("RPC: gss_wrap_kerberos\n"); | ||
132 | |||
133 | now = get_seconds(); | ||
134 | |||
135 | switch (kctx->signalg) { | ||
136 | case SGN_ALG_DES_MAC_MD5: | ||
137 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
138 | break; | ||
139 | default: | ||
140 | dprintk("RPC: gss_krb5_seal: kctx->signalg %d not" | ||
141 | " supported\n", kctx->signalg); | ||
142 | goto out_err; | ||
143 | } | ||
144 | if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) { | ||
145 | dprintk("RPC: gss_krb5_seal: kctx->sealalg %d not supported\n", | ||
146 | kctx->sealalg); | ||
147 | goto out_err; | ||
148 | } | ||
149 | |||
150 | blocksize = crypto_tfm_alg_blocksize(kctx->enc); | ||
151 | gss_krb5_add_padding(buf, offset, blocksize); | ||
152 | BUG_ON((buf->len - offset) % blocksize); | ||
153 | plainlen = blocksize + buf->len - offset; | ||
154 | |||
155 | headlen = g_token_size(&kctx->mech_used, 22 + plainlen) - | ||
156 | (buf->len - offset); | ||
157 | |||
158 | ptr = buf->head[0].iov_base + offset; | ||
159 | /* shift data to make room for header. */ | ||
160 | /* XXX Would be cleverer to encrypt while copying. */ | ||
161 | /* XXX bounds checking, slack, etc. */ | ||
162 | memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset); | ||
163 | buf->head[0].iov_len += headlen; | ||
164 | buf->len += headlen; | ||
165 | BUG_ON((buf->len - offset - headlen) % blocksize); | ||
166 | |||
167 | g_make_token_header(&kctx->mech_used, 22 + plainlen, &ptr); | ||
168 | |||
169 | |||
170 | *ptr++ = (unsigned char) ((KG_TOK_WRAP_MSG>>8)&0xff); | ||
171 | *ptr++ = (unsigned char) (KG_TOK_WRAP_MSG&0xff); | ||
172 | |||
173 | /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */ | ||
174 | krb5_hdr = ptr - 2; | ||
175 | msg_start = krb5_hdr + 24; | ||
176 | /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize); | ||
177 | |||
178 | *(u16 *)(krb5_hdr + 2) = htons(kctx->signalg); | ||
179 | memset(krb5_hdr + 4, 0xff, 4); | ||
180 | *(u16 *)(krb5_hdr + 4) = htons(kctx->sealalg); | ||
181 | |||
182 | make_confounder(msg_start, blocksize); | ||
183 | |||
184 | /* XXXJBF: UGH!: */ | ||
185 | tmp_pages = buf->pages; | ||
186 | buf->pages = pages; | ||
187 | if (make_checksum(checksum_type, krb5_hdr, 8, buf, | ||
188 | offset + headlen - blocksize, &md5cksum)) | ||
189 | goto out_err; | ||
190 | buf->pages = tmp_pages; | ||
191 | |||
192 | switch (kctx->signalg) { | ||
193 | case SGN_ALG_DES_MAC_MD5: | ||
194 | if (krb5_encrypt(kctx->seq, NULL, md5cksum.data, | ||
195 | md5cksum.data, md5cksum.len)) | ||
196 | goto out_err; | ||
197 | memcpy(krb5_hdr + 16, | ||
198 | md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, | ||
199 | KRB5_CKSUM_LENGTH); | ||
200 | |||
201 | dprintk("RPC: make_seal_token: cksum data: \n"); | ||
202 | print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); | ||
203 | break; | ||
204 | default: | ||
205 | BUG(); | ||
206 | } | ||
207 | |||
208 | kfree(md5cksum.data); | ||
209 | |||
210 | /* XXX would probably be more efficient to compute checksum | ||
211 | * and encrypt at the same time: */ | ||
212 | if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff, | ||
213 | kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))) | ||
214 | goto out_err; | ||
215 | |||
216 | if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize, | ||
217 | pages)) | ||
218 | goto out_err; | ||
219 | |||
220 | kctx->seq_send++; | ||
221 | |||
222 | return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); | ||
223 | out_err: | ||
224 | if (md5cksum.data) kfree(md5cksum.data); | ||
225 | return GSS_S_FAILURE; | ||
226 | } | ||
227 | |||
228 | u32 | ||
229 | gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf) | ||
230 | { | ||
231 | struct krb5_ctx *kctx = ctx->internal_ctx_id; | ||
232 | int signalg; | ||
233 | int sealalg; | ||
234 | s32 checksum_type; | ||
235 | struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; | ||
236 | s32 now; | ||
237 | int direction; | ||
238 | s32 seqnum; | ||
239 | unsigned char *ptr; | ||
240 | int bodysize; | ||
241 | u32 ret = GSS_S_DEFECTIVE_TOKEN; | ||
242 | void *data_start, *orig_start; | ||
243 | int data_len; | ||
244 | int blocksize; | ||
245 | |||
246 | dprintk("RPC: gss_unwrap_kerberos\n"); | ||
247 | |||
248 | ptr = (u8 *)buf->head[0].iov_base + offset; | ||
249 | if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, | ||
250 | buf->len - offset)) | ||
251 | goto out; | ||
252 | |||
253 | if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) || | ||
254 | (*ptr++ != (KG_TOK_WRAP_MSG &0xff)) ) | ||
255 | goto out; | ||
256 | |||
257 | /* XXX sanity-check bodysize?? */ | ||
258 | |||
259 | /* get the sign and seal algorithms */ | ||
260 | |||
261 | signalg = ptr[0] + (ptr[1] << 8); | ||
262 | sealalg = ptr[2] + (ptr[3] << 8); | ||
263 | |||
264 | /* Sanity checks */ | ||
265 | |||
266 | if ((ptr[4] != 0xff) || (ptr[5] != 0xff)) | ||
267 | goto out; | ||
268 | |||
269 | if (sealalg == 0xffff) | ||
270 | goto out; | ||
271 | |||
272 | /* in the current spec, there is only one valid seal algorithm per | ||
273 | key type, so a simple comparison is ok */ | ||
274 | |||
275 | if (sealalg != kctx->sealalg) | ||
276 | goto out; | ||
277 | |||
278 | /* there are several mappings of seal algorithms to sign algorithms, | ||
279 | but few enough that we can try them all. */ | ||
280 | |||
281 | if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) || | ||
282 | (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) || | ||
283 | (kctx->sealalg == SEAL_ALG_DES3KD && | ||
284 | signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) | ||
285 | goto out; | ||
286 | |||
287 | if (gss_decrypt_xdr_buf(kctx->enc, buf, | ||
288 | ptr + 22 - (unsigned char *)buf->head[0].iov_base)) | ||
289 | goto out; | ||
290 | |||
291 | /* compute the checksum of the message */ | ||
292 | |||
293 | /* initialize the the cksum */ | ||
294 | switch (signalg) { | ||
295 | case SGN_ALG_DES_MAC_MD5: | ||
296 | checksum_type = CKSUMTYPE_RSA_MD5; | ||
297 | break; | ||
298 | default: | ||
299 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
300 | goto out; | ||
301 | } | ||
302 | |||
303 | switch (signalg) { | ||
304 | case SGN_ALG_DES_MAC_MD5: | ||
305 | ret = make_checksum(checksum_type, ptr - 2, 8, buf, | ||
306 | ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum); | ||
307 | if (ret) | ||
308 | goto out; | ||
309 | |||
310 | ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data, | ||
311 | md5cksum.data, md5cksum.len); | ||
312 | if (ret) | ||
313 | goto out; | ||
314 | |||
315 | if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { | ||
316 | ret = GSS_S_BAD_SIG; | ||
317 | goto out; | ||
318 | } | ||
319 | break; | ||
320 | default: | ||
321 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
322 | goto out; | ||
323 | } | ||
324 | |||
325 | /* it got through unscathed. Make sure the context is unexpired */ | ||
326 | |||
327 | now = get_seconds(); | ||
328 | |||
329 | ret = GSS_S_CONTEXT_EXPIRED; | ||
330 | if (now > kctx->endtime) | ||
331 | goto out; | ||
332 | |||
333 | /* do sequencing checks */ | ||
334 | |||
335 | ret = GSS_S_BAD_SIG; | ||
336 | if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction, | ||
337 | &seqnum))) | ||
338 | goto out; | ||
339 | |||
340 | if ((kctx->initiate && direction != 0xff) || | ||
341 | (!kctx->initiate && direction != 0)) | ||
342 | goto out; | ||
343 | |||
344 | /* Copy the data back to the right position. XXX: Would probably be | ||
345 | * better to copy and encrypt at the same time. */ | ||
346 | |||
347 | blocksize = crypto_tfm_alg_blocksize(kctx->enc); | ||
348 | data_start = ptr + 22 + blocksize; | ||
349 | orig_start = buf->head[0].iov_base + offset; | ||
350 | data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; | ||
351 | memmove(orig_start, data_start, data_len); | ||
352 | buf->head[0].iov_len -= (data_start - orig_start); | ||
353 | buf->len -= (data_start - orig_start); | ||
354 | |||
355 | ret = GSS_S_DEFECTIVE_TOKEN; | ||
356 | if (gss_krb5_remove_padding(buf, blocksize)) | ||
357 | goto out; | ||
358 | |||
359 | ret = GSS_S_COMPLETE; | ||
360 | out: | ||
361 | if (md5cksum.data) kfree(md5cksum.data); | ||
362 | return ret; | ||
363 | } | ||
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 9dfb68377d69..b048bf672da2 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c | |||
@@ -35,7 +35,6 @@ | |||
35 | 35 | ||
36 | #include <linux/types.h> | 36 | #include <linux/types.h> |
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/socket.h> | ||
39 | #include <linux/module.h> | 38 | #include <linux/module.h> |
40 | #include <linux/sunrpc/msg_prot.h> | 39 | #include <linux/sunrpc/msg_prot.h> |
41 | #include <linux/sunrpc/gss_asn1.h> | 40 | #include <linux/sunrpc/gss_asn1.h> |
@@ -251,13 +250,11 @@ gss_import_sec_context(const void *input_token, size_t bufsize, | |||
251 | 250 | ||
252 | u32 | 251 | u32 |
253 | gss_get_mic(struct gss_ctx *context_handle, | 252 | gss_get_mic(struct gss_ctx *context_handle, |
254 | u32 qop, | ||
255 | struct xdr_buf *message, | 253 | struct xdr_buf *message, |
256 | struct xdr_netobj *mic_token) | 254 | struct xdr_netobj *mic_token) |
257 | { | 255 | { |
258 | return context_handle->mech_type->gm_ops | 256 | return context_handle->mech_type->gm_ops |
259 | ->gss_get_mic(context_handle, | 257 | ->gss_get_mic(context_handle, |
260 | qop, | ||
261 | message, | 258 | message, |
262 | mic_token); | 259 | mic_token); |
263 | } | 260 | } |
@@ -267,16 +264,34 @@ gss_get_mic(struct gss_ctx *context_handle, | |||
267 | u32 | 264 | u32 |
268 | gss_verify_mic(struct gss_ctx *context_handle, | 265 | gss_verify_mic(struct gss_ctx *context_handle, |
269 | struct xdr_buf *message, | 266 | struct xdr_buf *message, |
270 | struct xdr_netobj *mic_token, | 267 | struct xdr_netobj *mic_token) |
271 | u32 *qstate) | ||
272 | { | 268 | { |
273 | return context_handle->mech_type->gm_ops | 269 | return context_handle->mech_type->gm_ops |
274 | ->gss_verify_mic(context_handle, | 270 | ->gss_verify_mic(context_handle, |
275 | message, | 271 | message, |
276 | mic_token, | 272 | mic_token); |
277 | qstate); | ||
278 | } | 273 | } |
279 | 274 | ||
275 | u32 | ||
276 | gss_wrap(struct gss_ctx *ctx_id, | ||
277 | int offset, | ||
278 | struct xdr_buf *buf, | ||
279 | struct page **inpages) | ||
280 | { | ||
281 | return ctx_id->mech_type->gm_ops | ||
282 | ->gss_wrap(ctx_id, offset, buf, inpages); | ||
283 | } | ||
284 | |||
285 | u32 | ||
286 | gss_unwrap(struct gss_ctx *ctx_id, | ||
287 | int offset, | ||
288 | struct xdr_buf *buf) | ||
289 | { | ||
290 | return ctx_id->mech_type->gm_ops | ||
291 | ->gss_unwrap(ctx_id, offset, buf); | ||
292 | } | ||
293 | |||
294 | |||
280 | /* gss_delete_sec_context: free all resources associated with context_handle. | 295 | /* gss_delete_sec_context: free all resources associated with context_handle. |
281 | * Note this differs from the RFC 2744-specified prototype in that we don't | 296 | * Note this differs from the RFC 2744-specified prototype in that we don't |
282 | * bother returning an output token, since it would never be used anyway. */ | 297 | * bother returning an output token, since it would never be used anyway. */ |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 6c97d61baa9b..39b3edc14694 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c | |||
@@ -224,18 +224,13 @@ gss_delete_sec_context_spkm3(void *internal_ctx) { | |||
224 | static u32 | 224 | static u32 |
225 | gss_verify_mic_spkm3(struct gss_ctx *ctx, | 225 | gss_verify_mic_spkm3(struct gss_ctx *ctx, |
226 | struct xdr_buf *signbuf, | 226 | struct xdr_buf *signbuf, |
227 | struct xdr_netobj *checksum, | 227 | struct xdr_netobj *checksum) |
228 | u32 *qstate) { | 228 | { |
229 | u32 maj_stat = 0; | 229 | u32 maj_stat = 0; |
230 | int qop_state = 0; | ||
231 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | 230 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; |
232 | 231 | ||
233 | dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); | 232 | dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n"); |
234 | maj_stat = spkm3_read_token(sctx, checksum, signbuf, &qop_state, | 233 | maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK); |
235 | SPKM_MIC_TOK); | ||
236 | |||
237 | if (!maj_stat && qop_state) | ||
238 | *qstate = qop_state; | ||
239 | 234 | ||
240 | dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); | 235 | dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat); |
241 | return maj_stat; | 236 | return maj_stat; |
@@ -243,15 +238,15 @@ gss_verify_mic_spkm3(struct gss_ctx *ctx, | |||
243 | 238 | ||
244 | static u32 | 239 | static u32 |
245 | gss_get_mic_spkm3(struct gss_ctx *ctx, | 240 | gss_get_mic_spkm3(struct gss_ctx *ctx, |
246 | u32 qop, | ||
247 | struct xdr_buf *message_buffer, | 241 | struct xdr_buf *message_buffer, |
248 | struct xdr_netobj *message_token) { | 242 | struct xdr_netobj *message_token) |
243 | { | ||
249 | u32 err = 0; | 244 | u32 err = 0; |
250 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; | 245 | struct spkm3_ctx *sctx = ctx->internal_ctx_id; |
251 | 246 | ||
252 | dprintk("RPC: gss_get_mic_spkm3\n"); | 247 | dprintk("RPC: gss_get_mic_spkm3\n"); |
253 | 248 | ||
254 | err = spkm3_make_token(sctx, qop, message_buffer, | 249 | err = spkm3_make_token(sctx, message_buffer, |
255 | message_token, SPKM_MIC_TOK); | 250 | message_token, SPKM_MIC_TOK); |
256 | return err; | 251 | return err; |
257 | } | 252 | } |
@@ -264,8 +259,8 @@ static struct gss_api_ops gss_spkm3_ops = { | |||
264 | }; | 259 | }; |
265 | 260 | ||
266 | static struct pf_desc gss_spkm3_pfs[] = { | 261 | static struct pf_desc gss_spkm3_pfs[] = { |
267 | {RPC_AUTH_GSS_SPKM, 0, RPC_GSS_SVC_NONE, "spkm3"}, | 262 | {RPC_AUTH_GSS_SPKM, RPC_GSS_SVC_NONE, "spkm3"}, |
268 | {RPC_AUTH_GSS_SPKMI, 0, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, | 263 | {RPC_AUTH_GSS_SPKMI, RPC_GSS_SVC_INTEGRITY, "spkm3i"}, |
269 | }; | 264 | }; |
270 | 265 | ||
271 | static struct gss_api_mech gss_spkm3_mech = { | 266 | static struct gss_api_mech gss_spkm3_mech = { |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c index 25339868d462..148201e929d0 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_seal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c | |||
@@ -51,7 +51,7 @@ | |||
51 | */ | 51 | */ |
52 | 52 | ||
53 | u32 | 53 | u32 |
54 | spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, | 54 | spkm3_make_token(struct spkm3_ctx *ctx, |
55 | struct xdr_buf * text, struct xdr_netobj * token, | 55 | struct xdr_buf * text, struct xdr_netobj * token, |
56 | int toktype) | 56 | int toktype) |
57 | { | 57 | { |
@@ -68,8 +68,6 @@ spkm3_make_token(struct spkm3_ctx *ctx, int qop_req, | |||
68 | dprintk("RPC: spkm3_make_token\n"); | 68 | dprintk("RPC: spkm3_make_token\n"); |
69 | 69 | ||
70 | now = jiffies; | 70 | now = jiffies; |
71 | if (qop_req != 0) | ||
72 | goto out_err; | ||
73 | 71 | ||
74 | if (ctx->ctx_id.len != 16) { | 72 | if (ctx->ctx_id.len != 16) { |
75 | dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", | 73 | dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n", |
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c index 65ce81bf0bc4..c3c0d9586103 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c +++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c | |||
@@ -52,7 +52,7 @@ u32 | |||
52 | spkm3_read_token(struct spkm3_ctx *ctx, | 52 | spkm3_read_token(struct spkm3_ctx *ctx, |
53 | struct xdr_netobj *read_token, /* checksum */ | 53 | struct xdr_netobj *read_token, /* checksum */ |
54 | struct xdr_buf *message_buffer, /* signbuf */ | 54 | struct xdr_buf *message_buffer, /* signbuf */ |
55 | int *qop_state, int toktype) | 55 | int toktype) |
56 | { | 56 | { |
57 | s32 code; | 57 | s32 code; |
58 | struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; | 58 | struct xdr_netobj wire_cksum = {.len =0, .data = NULL}; |
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index e3308195374e..e4ada15ed856 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c | |||
@@ -566,8 +566,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, | |||
566 | 566 | ||
567 | if (rqstp->rq_deferred) /* skip verification of revisited request */ | 567 | if (rqstp->rq_deferred) /* skip verification of revisited request */ |
568 | return SVC_OK; | 568 | return SVC_OK; |
569 | if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) | 569 | if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) { |
570 | != GSS_S_COMPLETE) { | ||
571 | *authp = rpcsec_gsserr_credproblem; | 570 | *authp = rpcsec_gsserr_credproblem; |
572 | return SVC_DENIED; | 571 | return SVC_DENIED; |
573 | } | 572 | } |
@@ -604,7 +603,7 @@ gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) | |||
604 | xdr_buf_from_iov(&iov, &verf_data); | 603 | xdr_buf_from_iov(&iov, &verf_data); |
605 | p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; | 604 | p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; |
606 | mic.data = (u8 *)(p + 1); | 605 | mic.data = (u8 *)(p + 1); |
607 | maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); | 606 | maj_stat = gss_get_mic(ctx_id, &verf_data, &mic); |
608 | if (maj_stat != GSS_S_COMPLETE) | 607 | if (maj_stat != GSS_S_COMPLETE) |
609 | return -1; | 608 | return -1; |
610 | *p++ = htonl(mic.len); | 609 | *p++ = htonl(mic.len); |
@@ -710,7 +709,7 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) | |||
710 | goto out; | 709 | goto out; |
711 | if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) | 710 | if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) |
712 | goto out; | 711 | goto out; |
713 | maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); | 712 | maj_stat = gss_verify_mic(ctx, &integ_buf, &mic); |
714 | if (maj_stat != GSS_S_COMPLETE) | 713 | if (maj_stat != GSS_S_COMPLETE) |
715 | goto out; | 714 | goto out; |
716 | if (ntohl(svc_getu32(&buf->head[0])) != seq) | 715 | if (ntohl(svc_getu32(&buf->head[0])) != seq) |
@@ -1012,7 +1011,7 @@ svcauth_gss_release(struct svc_rqst *rqstp) | |||
1012 | resv = &resbuf->tail[0]; | 1011 | resv = &resbuf->tail[0]; |
1013 | } | 1012 | } |
1014 | mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; | 1013 | mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; |
1015 | if (gss_get_mic(gsd->rsci->mechctx, 0, &integ_buf, &mic)) | 1014 | if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic)) |
1016 | goto out_err; | 1015 | goto out_err; |
1017 | svc_putu32(resv, htonl(mic.len)); | 1016 | svc_putu32(resv, htonl(mic.len)); |
1018 | memset(mic.data + mic.len, 0, | 1017 | memset(mic.data + mic.len, 0, |
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 9b72d3abf823..f56767aaa927 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c | |||
@@ -7,9 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/socket.h> | ||
11 | #include <linux/module.h> | 10 | #include <linux/module.h> |
12 | #include <linux/in.h> | ||
13 | #include <linux/utsname.h> | 11 | #include <linux/utsname.h> |
14 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 4ff297a9b15b..890fb5ea0dcb 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c | |||
@@ -9,8 +9,6 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/socket.h> | ||
13 | #include <linux/in.h> | ||
14 | #include <linux/sunrpc/clnt.h> | 12 | #include <linux/sunrpc/clnt.h> |
15 | #include <linux/sunrpc/auth.h> | 13 | #include <linux/sunrpc/auth.h> |
16 | 14 | ||
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f17e6153b688..702ede309b06 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * linux/net/sunrpc/rpcclnt.c | 2 | * linux/net/sunrpc/clnt.c |
3 | * | 3 | * |
4 | * This file contains the high-level RPC interface. | 4 | * This file contains the high-level RPC interface. |
5 | * It is modeled as a finite state machine to support both synchronous | 5 | * It is modeled as a finite state machine to support both synchronous |
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/in.h> | ||
31 | #include <linux/utsname.h> | 30 | #include <linux/utsname.h> |
32 | 31 | ||
33 | #include <linux/sunrpc/clnt.h> | 32 | #include <linux/sunrpc/clnt.h> |
@@ -53,6 +52,7 @@ static void call_allocate(struct rpc_task *task); | |||
53 | static void call_encode(struct rpc_task *task); | 52 | static void call_encode(struct rpc_task *task); |
54 | static void call_decode(struct rpc_task *task); | 53 | static void call_decode(struct rpc_task *task); |
55 | static void call_bind(struct rpc_task *task); | 54 | static void call_bind(struct rpc_task *task); |
55 | static void call_bind_status(struct rpc_task *task); | ||
56 | static void call_transmit(struct rpc_task *task); | 56 | static void call_transmit(struct rpc_task *task); |
57 | static void call_status(struct rpc_task *task); | 57 | static void call_status(struct rpc_task *task); |
58 | static void call_refresh(struct rpc_task *task); | 58 | static void call_refresh(struct rpc_task *task); |
@@ -517,15 +517,8 @@ void | |||
517 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) | 517 | rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) |
518 | { | 518 | { |
519 | struct rpc_xprt *xprt = clnt->cl_xprt; | 519 | struct rpc_xprt *xprt = clnt->cl_xprt; |
520 | 520 | if (xprt->ops->set_buffer_size) | |
521 | xprt->sndsize = 0; | 521 | xprt->ops->set_buffer_size(xprt, sndsize, rcvsize); |
522 | if (sndsize) | ||
523 | xprt->sndsize = sndsize + RPC_SLACK_SPACE; | ||
524 | xprt->rcvsize = 0; | ||
525 | if (rcvsize) | ||
526 | xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; | ||
527 | if (xprt_connected(xprt)) | ||
528 | xprt_sock_setbufsize(xprt); | ||
529 | } | 522 | } |
530 | 523 | ||
531 | /* | 524 | /* |
@@ -685,13 +678,11 @@ call_allocate(struct rpc_task *task) | |||
685 | static void | 678 | static void |
686 | call_encode(struct rpc_task *task) | 679 | call_encode(struct rpc_task *task) |
687 | { | 680 | { |
688 | struct rpc_clnt *clnt = task->tk_client; | ||
689 | struct rpc_rqst *req = task->tk_rqstp; | 681 | struct rpc_rqst *req = task->tk_rqstp; |
690 | struct xdr_buf *sndbuf = &req->rq_snd_buf; | 682 | struct xdr_buf *sndbuf = &req->rq_snd_buf; |
691 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 683 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
692 | unsigned int bufsiz; | 684 | unsigned int bufsiz; |
693 | kxdrproc_t encode; | 685 | kxdrproc_t encode; |
694 | int status; | ||
695 | u32 *p; | 686 | u32 *p; |
696 | 687 | ||
697 | dprintk("RPC: %4d call_encode (status %d)\n", | 688 | dprintk("RPC: %4d call_encode (status %d)\n", |
@@ -719,11 +710,15 @@ call_encode(struct rpc_task *task) | |||
719 | rpc_exit(task, -EIO); | 710 | rpc_exit(task, -EIO); |
720 | return; | 711 | return; |
721 | } | 712 | } |
722 | if (encode && (status = rpcauth_wrap_req(task, encode, req, p, | 713 | if (encode == NULL) |
723 | task->tk_msg.rpc_argp)) < 0) { | 714 | return; |
724 | printk(KERN_WARNING "%s: can't encode arguments: %d\n", | 715 | |
725 | clnt->cl_protname, -status); | 716 | task->tk_status = rpcauth_wrap_req(task, encode, req, p, |
726 | rpc_exit(task, status); | 717 | task->tk_msg.rpc_argp); |
718 | if (task->tk_status == -ENOMEM) { | ||
719 | /* XXX: Is this sane? */ | ||
720 | rpc_delay(task, 3*HZ); | ||
721 | task->tk_status = -EAGAIN; | ||
727 | } | 722 | } |
728 | } | 723 | } |
729 | 724 | ||
@@ -734,43 +729,95 @@ static void | |||
734 | call_bind(struct rpc_task *task) | 729 | call_bind(struct rpc_task *task) |
735 | { | 730 | { |
736 | struct rpc_clnt *clnt = task->tk_client; | 731 | struct rpc_clnt *clnt = task->tk_client; |
737 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
738 | |||
739 | dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, | ||
740 | xprt, (xprt_connected(xprt) ? "is" : "is not")); | ||
741 | 732 | ||
742 | task->tk_action = (xprt_connected(xprt)) ? call_transmit : call_connect; | 733 | dprintk("RPC: %4d call_bind (status %d)\n", |
734 | task->tk_pid, task->tk_status); | ||
743 | 735 | ||
736 | task->tk_action = call_connect; | ||
744 | if (!clnt->cl_port) { | 737 | if (!clnt->cl_port) { |
745 | task->tk_action = call_connect; | 738 | task->tk_action = call_bind_status; |
746 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | 739 | task->tk_timeout = task->tk_xprt->bind_timeout; |
747 | rpc_getport(task, clnt); | 740 | rpc_getport(task, clnt); |
748 | } | 741 | } |
749 | } | 742 | } |
750 | 743 | ||
751 | /* | 744 | /* |
752 | * 4a. Connect to the RPC server (TCP case) | 745 | * 4a. Sort out bind result |
746 | */ | ||
747 | static void | ||
748 | call_bind_status(struct rpc_task *task) | ||
749 | { | ||
750 | int status = -EACCES; | ||
751 | |||
752 | if (task->tk_status >= 0) { | ||
753 | dprintk("RPC: %4d call_bind_status (status %d)\n", | ||
754 | task->tk_pid, task->tk_status); | ||
755 | task->tk_status = 0; | ||
756 | task->tk_action = call_connect; | ||
757 | return; | ||
758 | } | ||
759 | |||
760 | switch (task->tk_status) { | ||
761 | case -EACCES: | ||
762 | dprintk("RPC: %4d remote rpcbind: RPC program/version unavailable\n", | ||
763 | task->tk_pid); | ||
764 | rpc_delay(task, 3*HZ); | ||
765 | goto retry_bind; | ||
766 | case -ETIMEDOUT: | ||
767 | dprintk("RPC: %4d rpcbind request timed out\n", | ||
768 | task->tk_pid); | ||
769 | if (RPC_IS_SOFT(task)) { | ||
770 | status = -EIO; | ||
771 | break; | ||
772 | } | ||
773 | goto retry_bind; | ||
774 | case -EPFNOSUPPORT: | ||
775 | dprintk("RPC: %4d remote rpcbind service unavailable\n", | ||
776 | task->tk_pid); | ||
777 | break; | ||
778 | case -EPROTONOSUPPORT: | ||
779 | dprintk("RPC: %4d remote rpcbind version 2 unavailable\n", | ||
780 | task->tk_pid); | ||
781 | break; | ||
782 | default: | ||
783 | dprintk("RPC: %4d unrecognized rpcbind error (%d)\n", | ||
784 | task->tk_pid, -task->tk_status); | ||
785 | status = -EIO; | ||
786 | break; | ||
787 | } | ||
788 | |||
789 | rpc_exit(task, status); | ||
790 | return; | ||
791 | |||
792 | retry_bind: | ||
793 | task->tk_status = 0; | ||
794 | task->tk_action = call_bind; | ||
795 | return; | ||
796 | } | ||
797 | |||
798 | /* | ||
799 | * 4b. Connect to the RPC server | ||
753 | */ | 800 | */ |
754 | static void | 801 | static void |
755 | call_connect(struct rpc_task *task) | 802 | call_connect(struct rpc_task *task) |
756 | { | 803 | { |
757 | struct rpc_clnt *clnt = task->tk_client; | 804 | struct rpc_xprt *xprt = task->tk_xprt; |
758 | 805 | ||
759 | dprintk("RPC: %4d call_connect status %d\n", | 806 | dprintk("RPC: %4d call_connect xprt %p %s connected\n", |
760 | task->tk_pid, task->tk_status); | 807 | task->tk_pid, xprt, |
808 | (xprt_connected(xprt) ? "is" : "is not")); | ||
761 | 809 | ||
762 | if (xprt_connected(clnt->cl_xprt)) { | 810 | task->tk_action = call_transmit; |
763 | task->tk_action = call_transmit; | 811 | if (!xprt_connected(xprt)) { |
764 | return; | 812 | task->tk_action = call_connect_status; |
813 | if (task->tk_status < 0) | ||
814 | return; | ||
815 | xprt_connect(task); | ||
765 | } | 816 | } |
766 | task->tk_action = call_connect_status; | ||
767 | if (task->tk_status < 0) | ||
768 | return; | ||
769 | xprt_connect(task); | ||
770 | } | 817 | } |
771 | 818 | ||
772 | /* | 819 | /* |
773 | * 4b. Sort out connect result | 820 | * 4c. Sort out connect result |
774 | */ | 821 | */ |
775 | static void | 822 | static void |
776 | call_connect_status(struct rpc_task *task) | 823 | call_connect_status(struct rpc_task *task) |
@@ -778,6 +825,9 @@ call_connect_status(struct rpc_task *task) | |||
778 | struct rpc_clnt *clnt = task->tk_client; | 825 | struct rpc_clnt *clnt = task->tk_client; |
779 | int status = task->tk_status; | 826 | int status = task->tk_status; |
780 | 827 | ||
828 | dprintk("RPC: %5u call_connect_status (status %d)\n", | ||
829 | task->tk_pid, task->tk_status); | ||
830 | |||
781 | task->tk_status = 0; | 831 | task->tk_status = 0; |
782 | if (status >= 0) { | 832 | if (status >= 0) { |
783 | clnt->cl_stats->netreconn++; | 833 | clnt->cl_stats->netreconn++; |
@@ -785,17 +835,19 @@ call_connect_status(struct rpc_task *task) | |||
785 | return; | 835 | return; |
786 | } | 836 | } |
787 | 837 | ||
788 | /* Something failed: we may have to rebind */ | 838 | /* Something failed: remote service port may have changed */ |
789 | if (clnt->cl_autobind) | 839 | if (clnt->cl_autobind) |
790 | clnt->cl_port = 0; | 840 | clnt->cl_port = 0; |
841 | |||
791 | switch (status) { | 842 | switch (status) { |
792 | case -ENOTCONN: | 843 | case -ENOTCONN: |
793 | case -ETIMEDOUT: | 844 | case -ETIMEDOUT: |
794 | case -EAGAIN: | 845 | case -EAGAIN: |
795 | task->tk_action = (clnt->cl_port == 0) ? call_bind : call_connect; | 846 | task->tk_action = call_bind; |
796 | break; | 847 | break; |
797 | default: | 848 | default: |
798 | rpc_exit(task, -EIO); | 849 | rpc_exit(task, -EIO); |
850 | break; | ||
799 | } | 851 | } |
800 | } | 852 | } |
801 | 853 | ||
@@ -815,10 +867,12 @@ call_transmit(struct rpc_task *task) | |||
815 | if (task->tk_status != 0) | 867 | if (task->tk_status != 0) |
816 | return; | 868 | return; |
817 | /* Encode here so that rpcsec_gss can use correct sequence number. */ | 869 | /* Encode here so that rpcsec_gss can use correct sequence number. */ |
818 | if (!task->tk_rqstp->rq_bytes_sent) | 870 | if (task->tk_rqstp->rq_bytes_sent == 0) { |
819 | call_encode(task); | 871 | call_encode(task); |
820 | if (task->tk_status < 0) | 872 | /* Did the encode result in an error condition? */ |
821 | return; | 873 | if (task->tk_status != 0) |
874 | goto out_nosend; | ||
875 | } | ||
822 | xprt_transmit(task); | 876 | xprt_transmit(task); |
823 | if (task->tk_status < 0) | 877 | if (task->tk_status < 0) |
824 | return; | 878 | return; |
@@ -826,6 +880,10 @@ call_transmit(struct rpc_task *task) | |||
826 | task->tk_action = NULL; | 880 | task->tk_action = NULL; |
827 | rpc_wake_up_task(task); | 881 | rpc_wake_up_task(task); |
828 | } | 882 | } |
883 | return; | ||
884 | out_nosend: | ||
885 | /* release socket write lock before attempting to handle error */ | ||
886 | xprt_abort_transmit(task); | ||
829 | } | 887 | } |
830 | 888 | ||
831 | /* | 889 | /* |
@@ -1020,13 +1078,12 @@ static u32 * | |||
1020 | call_header(struct rpc_task *task) | 1078 | call_header(struct rpc_task *task) |
1021 | { | 1079 | { |
1022 | struct rpc_clnt *clnt = task->tk_client; | 1080 | struct rpc_clnt *clnt = task->tk_client; |
1023 | struct rpc_xprt *xprt = clnt->cl_xprt; | ||
1024 | struct rpc_rqst *req = task->tk_rqstp; | 1081 | struct rpc_rqst *req = task->tk_rqstp; |
1025 | u32 *p = req->rq_svec[0].iov_base; | 1082 | u32 *p = req->rq_svec[0].iov_base; |
1026 | 1083 | ||
1027 | /* FIXME: check buffer size? */ | 1084 | /* FIXME: check buffer size? */ |
1028 | if (xprt->stream) | 1085 | |
1029 | *p++ = 0; /* fill in later */ | 1086 | p = xprt_skip_transport_header(task->tk_xprt, p); |
1030 | *p++ = req->rq_xid; /* XID */ | 1087 | *p++ = req->rq_xid; /* XID */ |
1031 | *p++ = htonl(RPC_CALL); /* CALL */ | 1088 | *p++ = htonl(RPC_CALL); /* CALL */ |
1032 | *p++ = htonl(RPC_VERSION); /* RPC version */ | 1089 | *p++ = htonl(RPC_VERSION); /* RPC version */ |
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c index 4e81f2766923..a398575f94b8 100644 --- a/net/sunrpc/pmap_clnt.c +++ b/net/sunrpc/pmap_clnt.c | |||
@@ -26,7 +26,7 @@ | |||
26 | #define PMAP_GETPORT 3 | 26 | #define PMAP_GETPORT 3 |
27 | 27 | ||
28 | static struct rpc_procinfo pmap_procedures[]; | 28 | static struct rpc_procinfo pmap_procedures[]; |
29 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int); | 29 | static struct rpc_clnt * pmap_create(char *, struct sockaddr_in *, int, int); |
30 | static void pmap_getport_done(struct rpc_task *); | 30 | static void pmap_getport_done(struct rpc_task *); |
31 | static struct rpc_program pmap_program; | 31 | static struct rpc_program pmap_program; |
32 | static DEFINE_SPINLOCK(pmap_lock); | 32 | static DEFINE_SPINLOCK(pmap_lock); |
@@ -65,7 +65,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) | |||
65 | map->pm_binding = 1; | 65 | map->pm_binding = 1; |
66 | spin_unlock(&pmap_lock); | 66 | spin_unlock(&pmap_lock); |
67 | 67 | ||
68 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot); | 68 | pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot, 0); |
69 | if (IS_ERR(pmap_clnt)) { | 69 | if (IS_ERR(pmap_clnt)) { |
70 | task->tk_status = PTR_ERR(pmap_clnt); | 70 | task->tk_status = PTR_ERR(pmap_clnt); |
71 | goto bailout; | 71 | goto bailout; |
@@ -112,7 +112,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot) | |||
112 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); | 112 | NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot); |
113 | 113 | ||
114 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); | 114 | sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(sin->sin_addr.s_addr)); |
115 | pmap_clnt = pmap_create(hostname, sin, prot); | 115 | pmap_clnt = pmap_create(hostname, sin, prot, 0); |
116 | if (IS_ERR(pmap_clnt)) | 116 | if (IS_ERR(pmap_clnt)) |
117 | return PTR_ERR(pmap_clnt); | 117 | return PTR_ERR(pmap_clnt); |
118 | 118 | ||
@@ -171,7 +171,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
171 | 171 | ||
172 | sin.sin_family = AF_INET; | 172 | sin.sin_family = AF_INET; |
173 | sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); | 173 | sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); |
174 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP); | 174 | pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1); |
175 | if (IS_ERR(pmap_clnt)) { | 175 | if (IS_ERR(pmap_clnt)) { |
176 | error = PTR_ERR(pmap_clnt); | 176 | error = PTR_ERR(pmap_clnt); |
177 | dprintk("RPC: couldn't create pmap client. Error = %d\n", error); | 177 | dprintk("RPC: couldn't create pmap client. Error = %d\n", error); |
@@ -198,7 +198,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) | |||
198 | } | 198 | } |
199 | 199 | ||
200 | static struct rpc_clnt * | 200 | static struct rpc_clnt * |
201 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) | 201 | pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileged) |
202 | { | 202 | { |
203 | struct rpc_xprt *xprt; | 203 | struct rpc_xprt *xprt; |
204 | struct rpc_clnt *clnt; | 204 | struct rpc_clnt *clnt; |
@@ -208,6 +208,8 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto) | |||
208 | if (IS_ERR(xprt)) | 208 | if (IS_ERR(xprt)) |
209 | return (struct rpc_clnt *)xprt; | 209 | return (struct rpc_clnt *)xprt; |
210 | xprt->addr.sin_port = htons(RPC_PMAP_PORT); | 210 | xprt->addr.sin_port = htons(RPC_PMAP_PORT); |
211 | if (!privileged) | ||
212 | xprt->resvport = 0; | ||
211 | 213 | ||
212 | /* printk("pmap: create clnt\n"); */ | 214 | /* printk("pmap: create clnt\n"); */ |
213 | clnt = rpc_new_client(xprt, hostname, | 215 | clnt = rpc_new_client(xprt, hostname, |
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ded6c63f11ec..4f188d0a5d11 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c | |||
@@ -76,25 +76,35 @@ int | |||
76 | rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) | 76 | rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) |
77 | { | 77 | { |
78 | struct rpc_inode *rpci = RPC_I(inode); | 78 | struct rpc_inode *rpci = RPC_I(inode); |
79 | int res = 0; | 79 | int res = -EPIPE; |
80 | 80 | ||
81 | down(&inode->i_sem); | 81 | down(&inode->i_sem); |
82 | if (rpci->ops == NULL) | ||
83 | goto out; | ||
82 | if (rpci->nreaders) { | 84 | if (rpci->nreaders) { |
83 | list_add_tail(&msg->list, &rpci->pipe); | 85 | list_add_tail(&msg->list, &rpci->pipe); |
84 | rpci->pipelen += msg->len; | 86 | rpci->pipelen += msg->len; |
87 | res = 0; | ||
85 | } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { | 88 | } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { |
86 | if (list_empty(&rpci->pipe)) | 89 | if (list_empty(&rpci->pipe)) |
87 | schedule_delayed_work(&rpci->queue_timeout, | 90 | schedule_delayed_work(&rpci->queue_timeout, |
88 | RPC_UPCALL_TIMEOUT); | 91 | RPC_UPCALL_TIMEOUT); |
89 | list_add_tail(&msg->list, &rpci->pipe); | 92 | list_add_tail(&msg->list, &rpci->pipe); |
90 | rpci->pipelen += msg->len; | 93 | rpci->pipelen += msg->len; |
91 | } else | 94 | res = 0; |
92 | res = -EPIPE; | 95 | } |
96 | out: | ||
93 | up(&inode->i_sem); | 97 | up(&inode->i_sem); |
94 | wake_up(&rpci->waitq); | 98 | wake_up(&rpci->waitq); |
95 | return res; | 99 | return res; |
96 | } | 100 | } |
97 | 101 | ||
102 | static inline void | ||
103 | rpc_inode_setowner(struct inode *inode, void *private) | ||
104 | { | ||
105 | RPC_I(inode)->private = private; | ||
106 | } | ||
107 | |||
98 | static void | 108 | static void |
99 | rpc_close_pipes(struct inode *inode) | 109 | rpc_close_pipes(struct inode *inode) |
100 | { | 110 | { |
@@ -111,15 +121,10 @@ rpc_close_pipes(struct inode *inode) | |||
111 | rpci->ops->release_pipe(inode); | 121 | rpci->ops->release_pipe(inode); |
112 | rpci->ops = NULL; | 122 | rpci->ops = NULL; |
113 | } | 123 | } |
124 | rpc_inode_setowner(inode, NULL); | ||
114 | up(&inode->i_sem); | 125 | up(&inode->i_sem); |
115 | } | 126 | } |
116 | 127 | ||
117 | static inline void | ||
118 | rpc_inode_setowner(struct inode *inode, void *private) | ||
119 | { | ||
120 | RPC_I(inode)->private = private; | ||
121 | } | ||
122 | |||
123 | static struct inode * | 128 | static struct inode * |
124 | rpc_alloc_inode(struct super_block *sb) | 129 | rpc_alloc_inode(struct super_block *sb) |
125 | { | 130 | { |
@@ -501,7 +506,6 @@ repeat: | |||
501 | dentry = dvec[--n]; | 506 | dentry = dvec[--n]; |
502 | if (dentry->d_inode) { | 507 | if (dentry->d_inode) { |
503 | rpc_close_pipes(dentry->d_inode); | 508 | rpc_close_pipes(dentry->d_inode); |
504 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
505 | simple_unlink(dir, dentry); | 509 | simple_unlink(dir, dentry); |
506 | } | 510 | } |
507 | dput(dentry); | 511 | dput(dentry); |
@@ -576,10 +580,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) | |||
576 | int error; | 580 | int error; |
577 | 581 | ||
578 | shrink_dcache_parent(dentry); | 582 | shrink_dcache_parent(dentry); |
579 | if (dentry->d_inode) { | 583 | if (dentry->d_inode) |
580 | rpc_close_pipes(dentry->d_inode); | 584 | rpc_close_pipes(dentry->d_inode); |
581 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
582 | } | ||
583 | if ((error = simple_rmdir(dir, dentry)) != 0) | 585 | if ((error = simple_rmdir(dir, dentry)) != 0) |
584 | return error; | 586 | return error; |
585 | if (!error) { | 587 | if (!error) { |
@@ -732,7 +734,6 @@ rpc_unlink(char *path) | |||
732 | d_drop(dentry); | 734 | d_drop(dentry); |
733 | if (dentry->d_inode) { | 735 | if (dentry->d_inode) { |
734 | rpc_close_pipes(dentry->d_inode); | 736 | rpc_close_pipes(dentry->d_inode); |
735 | rpc_inode_setowner(dentry->d_inode, NULL); | ||
736 | error = simple_unlink(dir, dentry); | 737 | error = simple_unlink(dir, dentry); |
737 | } | 738 | } |
738 | dput(dentry); | 739 | dput(dentry); |
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c new file mode 100644 index 000000000000..8f97e90f36c8 --- /dev/null +++ b/net/sunrpc/socklib.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/socklib.c | ||
3 | * | ||
4 | * Common socket helper routines for RPC client and server | ||
5 | * | ||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | ||
7 | */ | ||
8 | |||
9 | #include <linux/types.h> | ||
10 | #include <linux/pagemap.h> | ||
11 | #include <linux/udp.h> | ||
12 | #include <linux/sunrpc/xdr.h> | ||
13 | |||
14 | |||
15 | /** | ||
16 | * skb_read_bits - copy some data bits from skb to internal buffer | ||
17 | * @desc: sk_buff copy helper | ||
18 | * @to: copy destination | ||
19 | * @len: number of bytes to copy | ||
20 | * | ||
21 | * Possibly called several times to iterate over an sk_buff and copy | ||
22 | * data out of it. | ||
23 | */ | ||
24 | static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len) | ||
25 | { | ||
26 | if (len > desc->count) | ||
27 | len = desc->count; | ||
28 | if (skb_copy_bits(desc->skb, desc->offset, to, len)) | ||
29 | return 0; | ||
30 | desc->count -= len; | ||
31 | desc->offset += len; | ||
32 | return len; | ||
33 | } | ||
34 | |||
35 | /** | ||
36 | * skb_read_and_csum_bits - copy and checksum from skb to buffer | ||
37 | * @desc: sk_buff copy helper | ||
38 | * @to: copy destination | ||
39 | * @len: number of bytes to copy | ||
40 | * | ||
41 | * Same as skb_read_bits, but calculate a checksum at the same time. | ||
42 | */ | ||
43 | static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) | ||
44 | { | ||
45 | unsigned int csum2, pos; | ||
46 | |||
47 | if (len > desc->count) | ||
48 | len = desc->count; | ||
49 | pos = desc->offset; | ||
50 | csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); | ||
51 | desc->csum = csum_block_add(desc->csum, csum2, pos); | ||
52 | desc->count -= len; | ||
53 | desc->offset += len; | ||
54 | return len; | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * xdr_partial_copy_from_skb - copy data out of an skb | ||
59 | * @xdr: target XDR buffer | ||
60 | * @base: starting offset | ||
61 | * @desc: sk_buff copy helper | ||
62 | * @copy_actor: virtual method for copying data | ||
63 | * | ||
64 | */ | ||
65 | ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) | ||
66 | { | ||
67 | struct page **ppage = xdr->pages; | ||
68 | unsigned int len, pglen = xdr->page_len; | ||
69 | ssize_t copied = 0; | ||
70 | int ret; | ||
71 | |||
72 | len = xdr->head[0].iov_len; | ||
73 | if (base < len) { | ||
74 | len -= base; | ||
75 | ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); | ||
76 | copied += ret; | ||
77 | if (ret != len || !desc->count) | ||
78 | goto out; | ||
79 | base = 0; | ||
80 | } else | ||
81 | base -= len; | ||
82 | |||
83 | if (unlikely(pglen == 0)) | ||
84 | goto copy_tail; | ||
85 | if (unlikely(base >= pglen)) { | ||
86 | base -= pglen; | ||
87 | goto copy_tail; | ||
88 | } | ||
89 | if (base || xdr->page_base) { | ||
90 | pglen -= base; | ||
91 | base += xdr->page_base; | ||
92 | ppage += base >> PAGE_CACHE_SHIFT; | ||
93 | base &= ~PAGE_CACHE_MASK; | ||
94 | } | ||
95 | do { | ||
96 | char *kaddr; | ||
97 | |||
98 | /* ACL likes to be lazy in allocating pages - ACLs | ||
99 | * are small by default but can get huge. */ | ||
100 | if (unlikely(*ppage == NULL)) { | ||
101 | *ppage = alloc_page(GFP_ATOMIC); | ||
102 | if (unlikely(*ppage == NULL)) { | ||
103 | if (copied == 0) | ||
104 | copied = -ENOMEM; | ||
105 | goto out; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | len = PAGE_CACHE_SIZE; | ||
110 | kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); | ||
111 | if (base) { | ||
112 | len -= base; | ||
113 | if (pglen < len) | ||
114 | len = pglen; | ||
115 | ret = copy_actor(desc, kaddr + base, len); | ||
116 | base = 0; | ||
117 | } else { | ||
118 | if (pglen < len) | ||
119 | len = pglen; | ||
120 | ret = copy_actor(desc, kaddr, len); | ||
121 | } | ||
122 | flush_dcache_page(*ppage); | ||
123 | kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); | ||
124 | copied += ret; | ||
125 | if (ret != len || !desc->count) | ||
126 | goto out; | ||
127 | ppage++; | ||
128 | } while ((pglen -= len) != 0); | ||
129 | copy_tail: | ||
130 | len = xdr->tail[0].iov_len; | ||
131 | if (base < len) | ||
132 | copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); | ||
133 | out: | ||
134 | return copied; | ||
135 | } | ||
136 | |||
137 | /** | ||
138 | * csum_partial_copy_to_xdr - checksum and copy data | ||
139 | * @xdr: target XDR buffer | ||
140 | * @skb: source skb | ||
141 | * | ||
142 | * We have set things up such that we perform the checksum of the UDP | ||
143 | * packet in parallel with the copies into the RPC client iovec. -DaveM | ||
144 | */ | ||
145 | int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | ||
146 | { | ||
147 | skb_reader_t desc; | ||
148 | |||
149 | desc.skb = skb; | ||
150 | desc.offset = sizeof(struct udphdr); | ||
151 | desc.count = skb->len - desc.offset; | ||
152 | |||
153 | if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
154 | goto no_checksum; | ||
155 | |||
156 | desc.csum = csum_partial(skb->data, desc.offset, skb->csum); | ||
157 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) | ||
158 | return -1; | ||
159 | if (desc.offset != skb->len) { | ||
160 | unsigned int csum2; | ||
161 | csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); | ||
162 | desc.csum = csum_block_add(desc.csum, csum2, desc.offset); | ||
163 | } | ||
164 | if (desc.count) | ||
165 | return -1; | ||
166 | if ((unsigned short)csum_fold(desc.csum)) | ||
167 | return -1; | ||
168 | return 0; | ||
169 | no_checksum: | ||
170 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) | ||
171 | return -1; | ||
172 | if (desc.count) | ||
173 | return -1; | ||
174 | return 0; | ||
175 | } | ||
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index ed48ff022d35..2387e7b823ff 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | 11 | ||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/socket.h> | ||
14 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
15 | #include <linux/uio.h> | 14 | #include <linux/uio.h> |
16 | #include <linux/unistd.h> | 15 | #include <linux/unistd.h> |
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 691dea4a58e7..f16e7cdd6150 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c | |||
@@ -548,9 +548,6 @@ svc_write_space(struct sock *sk) | |||
548 | /* | 548 | /* |
549 | * Receive a datagram from a UDP socket. | 549 | * Receive a datagram from a UDP socket. |
550 | */ | 550 | */ |
551 | extern int | ||
552 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb); | ||
553 | |||
554 | static int | 551 | static int |
555 | svc_udp_recvfrom(struct svc_rqst *rqstp) | 552 | svc_udp_recvfrom(struct svc_rqst *rqstp) |
556 | { | 553 | { |
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index 1b9616a12e24..d0c9f460e411 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c | |||
@@ -119,8 +119,18 @@ done: | |||
119 | return 0; | 119 | return 0; |
120 | } | 120 | } |
121 | 121 | ||
122 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
123 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
124 | unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; | ||
125 | EXPORT_SYMBOL(xprt_min_resvport); | ||
126 | unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; | ||
127 | EXPORT_SYMBOL(xprt_max_resvport); | ||
128 | |||
129 | |||
122 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; | 130 | static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; |
123 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; | 131 | static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; |
132 | static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; | ||
133 | static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; | ||
124 | 134 | ||
125 | static ctl_table debug_table[] = { | 135 | static ctl_table debug_table[] = { |
126 | { | 136 | { |
@@ -177,6 +187,28 @@ static ctl_table debug_table[] = { | |||
177 | .extra1 = &min_slot_table_size, | 187 | .extra1 = &min_slot_table_size, |
178 | .extra2 = &max_slot_table_size | 188 | .extra2 = &max_slot_table_size |
179 | }, | 189 | }, |
190 | { | ||
191 | .ctl_name = CTL_MIN_RESVPORT, | ||
192 | .procname = "min_resvport", | ||
193 | .data = &xprt_min_resvport, | ||
194 | .maxlen = sizeof(unsigned int), | ||
195 | .mode = 0644, | ||
196 | .proc_handler = &proc_dointvec_minmax, | ||
197 | .strategy = &sysctl_intvec, | ||
198 | .extra1 = &xprt_min_resvport_limit, | ||
199 | .extra2 = &xprt_max_resvport_limit | ||
200 | }, | ||
201 | { | ||
202 | .ctl_name = CTL_MAX_RESVPORT, | ||
203 | .procname = "max_resvport", | ||
204 | .data = &xprt_max_resvport, | ||
205 | .maxlen = sizeof(unsigned int), | ||
206 | .mode = 0644, | ||
207 | .proc_handler = &proc_dointvec_minmax, | ||
208 | .strategy = &sysctl_intvec, | ||
209 | .extra1 = &xprt_min_resvport_limit, | ||
210 | .extra2 = &xprt_max_resvport_limit | ||
211 | }, | ||
180 | { .ctl_name = 0 } | 212 | { .ctl_name = 0 } |
181 | }; | 213 | }; |
182 | 214 | ||
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index fde16f40a581..32df43372ee9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c | |||
@@ -6,15 +6,12 @@ | |||
6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> | 6 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/module.h> | ||
9 | #include <linux/types.h> | 10 | #include <linux/types.h> |
10 | #include <linux/socket.h> | ||
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
15 | #include <linux/in.h> | ||
16 | #include <linux/net.h> | ||
17 | #include <net/sock.h> | ||
18 | #include <linux/sunrpc/xdr.h> | 15 | #include <linux/sunrpc/xdr.h> |
19 | #include <linux/sunrpc/msg_prot.h> | 16 | #include <linux/sunrpc/msg_prot.h> |
20 | 17 | ||
@@ -176,178 +173,6 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset, | |||
176 | xdr->buflen += len; | 173 | xdr->buflen += len; |
177 | } | 174 | } |
178 | 175 | ||
179 | ssize_t | ||
180 | xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, | ||
181 | skb_reader_t *desc, | ||
182 | skb_read_actor_t copy_actor) | ||
183 | { | ||
184 | struct page **ppage = xdr->pages; | ||
185 | unsigned int len, pglen = xdr->page_len; | ||
186 | ssize_t copied = 0; | ||
187 | int ret; | ||
188 | |||
189 | len = xdr->head[0].iov_len; | ||
190 | if (base < len) { | ||
191 | len -= base; | ||
192 | ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); | ||
193 | copied += ret; | ||
194 | if (ret != len || !desc->count) | ||
195 | goto out; | ||
196 | base = 0; | ||
197 | } else | ||
198 | base -= len; | ||
199 | |||
200 | if (pglen == 0) | ||
201 | goto copy_tail; | ||
202 | if (base >= pglen) { | ||
203 | base -= pglen; | ||
204 | goto copy_tail; | ||
205 | } | ||
206 | if (base || xdr->page_base) { | ||
207 | pglen -= base; | ||
208 | base += xdr->page_base; | ||
209 | ppage += base >> PAGE_CACHE_SHIFT; | ||
210 | base &= ~PAGE_CACHE_MASK; | ||
211 | } | ||
212 | do { | ||
213 | char *kaddr; | ||
214 | |||
215 | /* ACL likes to be lazy in allocating pages - ACLs | ||
216 | * are small by default but can get huge. */ | ||
217 | if (unlikely(*ppage == NULL)) { | ||
218 | *ppage = alloc_page(GFP_ATOMIC); | ||
219 | if (unlikely(*ppage == NULL)) { | ||
220 | if (copied == 0) | ||
221 | copied = -ENOMEM; | ||
222 | goto out; | ||
223 | } | ||
224 | } | ||
225 | |||
226 | len = PAGE_CACHE_SIZE; | ||
227 | kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); | ||
228 | if (base) { | ||
229 | len -= base; | ||
230 | if (pglen < len) | ||
231 | len = pglen; | ||
232 | ret = copy_actor(desc, kaddr + base, len); | ||
233 | base = 0; | ||
234 | } else { | ||
235 | if (pglen < len) | ||
236 | len = pglen; | ||
237 | ret = copy_actor(desc, kaddr, len); | ||
238 | } | ||
239 | flush_dcache_page(*ppage); | ||
240 | kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); | ||
241 | copied += ret; | ||
242 | if (ret != len || !desc->count) | ||
243 | goto out; | ||
244 | ppage++; | ||
245 | } while ((pglen -= len) != 0); | ||
246 | copy_tail: | ||
247 | len = xdr->tail[0].iov_len; | ||
248 | if (base < len) | ||
249 | copied += copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); | ||
250 | out: | ||
251 | return copied; | ||
252 | } | ||
253 | |||
254 | |||
255 | int | ||
256 | xdr_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, | ||
257 | struct xdr_buf *xdr, unsigned int base, int msgflags) | ||
258 | { | ||
259 | struct page **ppage = xdr->pages; | ||
260 | unsigned int len, pglen = xdr->page_len; | ||
261 | int err, ret = 0; | ||
262 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
263 | |||
264 | len = xdr->head[0].iov_len; | ||
265 | if (base < len || (addr != NULL && base == 0)) { | ||
266 | struct kvec iov = { | ||
267 | .iov_base = xdr->head[0].iov_base + base, | ||
268 | .iov_len = len - base, | ||
269 | }; | ||
270 | struct msghdr msg = { | ||
271 | .msg_name = addr, | ||
272 | .msg_namelen = addrlen, | ||
273 | .msg_flags = msgflags, | ||
274 | }; | ||
275 | if (xdr->len > len) | ||
276 | msg.msg_flags |= MSG_MORE; | ||
277 | |||
278 | if (iov.iov_len != 0) | ||
279 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
280 | else | ||
281 | err = kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
282 | if (ret == 0) | ||
283 | ret = err; | ||
284 | else if (err > 0) | ||
285 | ret += err; | ||
286 | if (err != iov.iov_len) | ||
287 | goto out; | ||
288 | base = 0; | ||
289 | } else | ||
290 | base -= len; | ||
291 | |||
292 | if (pglen == 0) | ||
293 | goto copy_tail; | ||
294 | if (base >= pglen) { | ||
295 | base -= pglen; | ||
296 | goto copy_tail; | ||
297 | } | ||
298 | if (base || xdr->page_base) { | ||
299 | pglen -= base; | ||
300 | base += xdr->page_base; | ||
301 | ppage += base >> PAGE_CACHE_SHIFT; | ||
302 | base &= ~PAGE_CACHE_MASK; | ||
303 | } | ||
304 | |||
305 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
306 | do { | ||
307 | int flags = msgflags; | ||
308 | |||
309 | len = PAGE_CACHE_SIZE; | ||
310 | if (base) | ||
311 | len -= base; | ||
312 | if (pglen < len) | ||
313 | len = pglen; | ||
314 | |||
315 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
316 | flags |= MSG_MORE; | ||
317 | |||
318 | /* Hmm... We might be dealing with highmem pages */ | ||
319 | if (PageHighMem(*ppage)) | ||
320 | sendpage = sock_no_sendpage; | ||
321 | err = sendpage(sock, *ppage, base, len, flags); | ||
322 | if (ret == 0) | ||
323 | ret = err; | ||
324 | else if (err > 0) | ||
325 | ret += err; | ||
326 | if (err != len) | ||
327 | goto out; | ||
328 | base = 0; | ||
329 | ppage++; | ||
330 | } while ((pglen -= len) != 0); | ||
331 | copy_tail: | ||
332 | len = xdr->tail[0].iov_len; | ||
333 | if (base < len) { | ||
334 | struct kvec iov = { | ||
335 | .iov_base = xdr->tail[0].iov_base + base, | ||
336 | .iov_len = len - base, | ||
337 | }; | ||
338 | struct msghdr msg = { | ||
339 | .msg_flags = msgflags, | ||
340 | }; | ||
341 | err = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
342 | if (ret == 0) | ||
343 | ret = err; | ||
344 | else if (err > 0) | ||
345 | ret += err; | ||
346 | } | ||
347 | out: | ||
348 | return ret; | ||
349 | } | ||
350 | |||
351 | 176 | ||
352 | /* | 177 | /* |
353 | * Helper routines for doing 'memmove' like operations on a struct xdr_buf | 178 | * Helper routines for doing 'memmove' like operations on a struct xdr_buf |
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3c654e06b084..6dda3860351f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c | |||
@@ -10,12 +10,12 @@ | |||
10 | * one is available. Otherwise, it sleeps on the backlog queue | 10 | * one is available. Otherwise, it sleeps on the backlog queue |
11 | * (xprt_reserve). | 11 | * (xprt_reserve). |
12 | * - Next, the caller puts together the RPC message, stuffs it into | 12 | * - Next, the caller puts together the RPC message, stuffs it into |
13 | * the request struct, and calls xprt_call(). | 13 | * the request struct, and calls xprt_transmit(). |
14 | * - xprt_call transmits the message and installs the caller on the | 14 | * - xprt_transmit sends the message and installs the caller on the |
15 | * socket's wait list. At the same time, it installs a timer that | 15 | * transport's wait list. At the same time, it installs a timer that |
16 | * is run after the packet's timeout has expired. | 16 | * is run after the packet's timeout has expired. |
17 | * - When a packet arrives, the data_ready handler walks the list of | 17 | * - When a packet arrives, the data_ready handler walks the list of |
18 | * pending requests for that socket. If a matching XID is found, the | 18 | * pending requests for that transport. If a matching XID is found, the |
19 | * caller is woken up, and the timer removed. | 19 | * caller is woken up, and the timer removed. |
20 | * - When no reply arrives within the timeout interval, the timer is | 20 | * - When no reply arrives within the timeout interval, the timer is |
21 | * fired by the kernel and runs xprt_timer(). It either adjusts the | 21 | * fired by the kernel and runs xprt_timer(). It either adjusts the |
@@ -33,36 +33,17 @@ | |||
33 | * | 33 | * |
34 | * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> | 34 | * Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de> |
35 | * | 35 | * |
36 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | 36 | * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com> |
37 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
38 | * TCP NFS related read + write fixes | ||
39 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
40 | * | ||
41 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
42 | * Fix behaviour when socket buffer is full. | ||
43 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
44 | */ | 37 | */ |
45 | 38 | ||
39 | #include <linux/module.h> | ||
40 | |||
46 | #include <linux/types.h> | 41 | #include <linux/types.h> |
47 | #include <linux/slab.h> | 42 | #include <linux/interrupt.h> |
48 | #include <linux/capability.h> | ||
49 | #include <linux/sched.h> | ||
50 | #include <linux/errno.h> | ||
51 | #include <linux/socket.h> | ||
52 | #include <linux/in.h> | ||
53 | #include <linux/net.h> | ||
54 | #include <linux/mm.h> | ||
55 | #include <linux/udp.h> | ||
56 | #include <linux/tcp.h> | ||
57 | #include <linux/sunrpc/clnt.h> | ||
58 | #include <linux/file.h> | ||
59 | #include <linux/workqueue.h> | 43 | #include <linux/workqueue.h> |
60 | #include <linux/random.h> | 44 | #include <linux/random.h> |
61 | 45 | ||
62 | #include <net/sock.h> | 46 | #include <linux/sunrpc/clnt.h> |
63 | #include <net/checksum.h> | ||
64 | #include <net/udp.h> | ||
65 | #include <net/tcp.h> | ||
66 | 47 | ||
67 | /* | 48 | /* |
68 | * Local variables | 49 | * Local variables |
@@ -73,81 +54,90 @@ | |||
73 | # define RPCDBG_FACILITY RPCDBG_XPRT | 54 | # define RPCDBG_FACILITY RPCDBG_XPRT |
74 | #endif | 55 | #endif |
75 | 56 | ||
76 | #define XPRT_MAX_BACKOFF (8) | ||
77 | #define XPRT_IDLE_TIMEOUT (5*60*HZ) | ||
78 | #define XPRT_MAX_RESVPORT (800) | ||
79 | |||
80 | /* | 57 | /* |
81 | * Local functions | 58 | * Local functions |
82 | */ | 59 | */ |
83 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); | 60 | static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); |
84 | static inline void do_xprt_reserve(struct rpc_task *); | 61 | static inline void do_xprt_reserve(struct rpc_task *); |
85 | static void xprt_disconnect(struct rpc_xprt *); | ||
86 | static void xprt_connect_status(struct rpc_task *task); | 62 | static void xprt_connect_status(struct rpc_task *task); |
87 | static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, | ||
88 | struct rpc_timeout *to); | ||
89 | static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); | ||
90 | static void xprt_bind_socket(struct rpc_xprt *, struct socket *); | ||
91 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); | 63 | static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); |
92 | 64 | ||
93 | static int xprt_clear_backlog(struct rpc_xprt *xprt); | ||
94 | |||
95 | #ifdef RPC_DEBUG_DATA | ||
96 | /* | 65 | /* |
97 | * Print the buffer contents (first 128 bytes only--just enough for | 66 | * The transport code maintains an estimate on the maximum number of out- |
98 | * diropres return). | 67 | * standing RPC requests, using a smoothed version of the congestion |
68 | * avoidance implemented in 44BSD. This is basically the Van Jacobson | ||
69 | * congestion algorithm: If a retransmit occurs, the congestion window is | ||
70 | * halved; otherwise, it is incremented by 1/cwnd when | ||
71 | * | ||
72 | * - a reply is received and | ||
73 | * - a full number of requests are outstanding and | ||
74 | * - the congestion window hasn't been updated recently. | ||
99 | */ | 75 | */ |
100 | static void | 76 | #define RPC_CWNDSHIFT (8U) |
101 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | 77 | #define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT) |
102 | { | 78 | #define RPC_INITCWND RPC_CWNDSCALE |
103 | u8 *buf = (u8 *) packet; | 79 | #define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT) |
104 | int j; | ||
105 | |||
106 | dprintk("RPC: %s\n", msg); | ||
107 | for (j = 0; j < count && j < 128; j += 4) { | ||
108 | if (!(j & 31)) { | ||
109 | if (j) | ||
110 | dprintk("\n"); | ||
111 | dprintk("0x%04x ", j); | ||
112 | } | ||
113 | dprintk("%02x%02x%02x%02x ", | ||
114 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
115 | } | ||
116 | dprintk("\n"); | ||
117 | } | ||
118 | #else | ||
119 | static inline void | ||
120 | xprt_pktdump(char *msg, u32 *packet, unsigned int count) | ||
121 | { | ||
122 | /* NOP */ | ||
123 | } | ||
124 | #endif | ||
125 | 80 | ||
126 | /* | 81 | #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) |
127 | * Look up RPC transport given an INET socket | 82 | |
83 | /** | ||
84 | * xprt_reserve_xprt - serialize write access to transports | ||
85 | * @task: task that is requesting access to the transport | ||
86 | * | ||
87 | * This prevents mixing the payload of separate requests, and prevents | ||
88 | * transport connects from colliding with writes. No congestion control | ||
89 | * is provided. | ||
128 | */ | 90 | */ |
129 | static inline struct rpc_xprt * | 91 | int xprt_reserve_xprt(struct rpc_task *task) |
130 | xprt_from_sock(struct sock *sk) | ||
131 | { | 92 | { |
132 | return (struct rpc_xprt *) sk->sk_user_data; | 93 | struct rpc_xprt *xprt = task->tk_xprt; |
94 | struct rpc_rqst *req = task->tk_rqstp; | ||
95 | |||
96 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { | ||
97 | if (task == xprt->snd_task) | ||
98 | return 1; | ||
99 | if (task == NULL) | ||
100 | return 0; | ||
101 | goto out_sleep; | ||
102 | } | ||
103 | xprt->snd_task = task; | ||
104 | if (req) { | ||
105 | req->rq_bytes_sent = 0; | ||
106 | req->rq_ntrans++; | ||
107 | } | ||
108 | return 1; | ||
109 | |||
110 | out_sleep: | ||
111 | dprintk("RPC: %4d failed to lock transport %p\n", | ||
112 | task->tk_pid, xprt); | ||
113 | task->tk_timeout = 0; | ||
114 | task->tk_status = -EAGAIN; | ||
115 | if (req && req->rq_ntrans) | ||
116 | rpc_sleep_on(&xprt->resend, task, NULL, NULL); | ||
117 | else | ||
118 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | ||
119 | return 0; | ||
133 | } | 120 | } |
134 | 121 | ||
135 | /* | 122 | /* |
136 | * Serialize write access to sockets, in order to prevent different | 123 | * xprt_reserve_xprt_cong - serialize write access to transports |
137 | * requests from interfering with each other. | 124 | * @task: task that is requesting access to the transport |
138 | * Also prevents TCP socket connects from colliding with writes. | 125 | * |
126 | * Same as xprt_reserve_xprt, but Van Jacobson congestion control is | ||
127 | * integrated into the decision of whether a request is allowed to be | ||
128 | * woken up and given access to the transport. | ||
139 | */ | 129 | */ |
140 | static int | 130 | int xprt_reserve_xprt_cong(struct rpc_task *task) |
141 | __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
142 | { | 131 | { |
132 | struct rpc_xprt *xprt = task->tk_xprt; | ||
143 | struct rpc_rqst *req = task->tk_rqstp; | 133 | struct rpc_rqst *req = task->tk_rqstp; |
144 | 134 | ||
145 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { | 135 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { |
146 | if (task == xprt->snd_task) | 136 | if (task == xprt->snd_task) |
147 | return 1; | 137 | return 1; |
148 | goto out_sleep; | 138 | goto out_sleep; |
149 | } | 139 | } |
150 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | 140 | if (__xprt_get_cong(xprt, task)) { |
151 | xprt->snd_task = task; | 141 | xprt->snd_task = task; |
152 | if (req) { | 142 | if (req) { |
153 | req->rq_bytes_sent = 0; | 143 | req->rq_bytes_sent = 0; |
@@ -156,10 +146,10 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | |||
156 | return 1; | 146 | return 1; |
157 | } | 147 | } |
158 | smp_mb__before_clear_bit(); | 148 | smp_mb__before_clear_bit(); |
159 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 149 | clear_bit(XPRT_LOCKED, &xprt->state); |
160 | smp_mb__after_clear_bit(); | 150 | smp_mb__after_clear_bit(); |
161 | out_sleep: | 151 | out_sleep: |
162 | dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); | 152 | dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt); |
163 | task->tk_timeout = 0; | 153 | task->tk_timeout = 0; |
164 | task->tk_status = -EAGAIN; | 154 | task->tk_status = -EAGAIN; |
165 | if (req && req->rq_ntrans) | 155 | if (req && req->rq_ntrans) |
@@ -169,26 +159,52 @@ out_sleep: | |||
169 | return 0; | 159 | return 0; |
170 | } | 160 | } |
171 | 161 | ||
172 | static inline int | 162 | static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) |
173 | xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
174 | { | 163 | { |
175 | int retval; | 164 | int retval; |
176 | 165 | ||
177 | spin_lock_bh(&xprt->sock_lock); | 166 | spin_lock_bh(&xprt->transport_lock); |
178 | retval = __xprt_lock_write(xprt, task); | 167 | retval = xprt->ops->reserve_xprt(task); |
179 | spin_unlock_bh(&xprt->sock_lock); | 168 | spin_unlock_bh(&xprt->transport_lock); |
180 | return retval; | 169 | return retval; |
181 | } | 170 | } |
182 | 171 | ||
172 | static void __xprt_lock_write_next(struct rpc_xprt *xprt) | ||
173 | { | ||
174 | struct rpc_task *task; | ||
175 | struct rpc_rqst *req; | ||
183 | 176 | ||
184 | static void | 177 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
185 | __xprt_lock_write_next(struct rpc_xprt *xprt) | 178 | return; |
179 | |||
180 | task = rpc_wake_up_next(&xprt->resend); | ||
181 | if (!task) { | ||
182 | task = rpc_wake_up_next(&xprt->sending); | ||
183 | if (!task) | ||
184 | goto out_unlock; | ||
185 | } | ||
186 | |||
187 | req = task->tk_rqstp; | ||
188 | xprt->snd_task = task; | ||
189 | if (req) { | ||
190 | req->rq_bytes_sent = 0; | ||
191 | req->rq_ntrans++; | ||
192 | } | ||
193 | return; | ||
194 | |||
195 | out_unlock: | ||
196 | smp_mb__before_clear_bit(); | ||
197 | clear_bit(XPRT_LOCKED, &xprt->state); | ||
198 | smp_mb__after_clear_bit(); | ||
199 | } | ||
200 | |||
201 | static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) | ||
186 | { | 202 | { |
187 | struct rpc_task *task; | 203 | struct rpc_task *task; |
188 | 204 | ||
189 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | 205 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
190 | return; | 206 | return; |
191 | if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) | 207 | if (RPCXPRT_CONGESTED(xprt)) |
192 | goto out_unlock; | 208 | goto out_unlock; |
193 | task = rpc_wake_up_next(&xprt->resend); | 209 | task = rpc_wake_up_next(&xprt->resend); |
194 | if (!task) { | 210 | if (!task) { |
@@ -196,7 +212,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) | |||
196 | if (!task) | 212 | if (!task) |
197 | goto out_unlock; | 213 | goto out_unlock; |
198 | } | 214 | } |
199 | if (xprt->nocong || __xprt_get_cong(xprt, task)) { | 215 | if (__xprt_get_cong(xprt, task)) { |
200 | struct rpc_rqst *req = task->tk_rqstp; | 216 | struct rpc_rqst *req = task->tk_rqstp; |
201 | xprt->snd_task = task; | 217 | xprt->snd_task = task; |
202 | if (req) { | 218 | if (req) { |
@@ -207,87 +223,52 @@ __xprt_lock_write_next(struct rpc_xprt *xprt) | |||
207 | } | 223 | } |
208 | out_unlock: | 224 | out_unlock: |
209 | smp_mb__before_clear_bit(); | 225 | smp_mb__before_clear_bit(); |
210 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 226 | clear_bit(XPRT_LOCKED, &xprt->state); |
211 | smp_mb__after_clear_bit(); | 227 | smp_mb__after_clear_bit(); |
212 | } | 228 | } |
213 | 229 | ||
214 | /* | 230 | /** |
215 | * Releases the socket for use by other requests. | 231 | * xprt_release_xprt - allow other requests to use a transport |
232 | * @xprt: transport with other tasks potentially waiting | ||
233 | * @task: task that is releasing access to the transport | ||
234 | * | ||
235 | * Note that "task" can be NULL. No congestion control is provided. | ||
216 | */ | 236 | */ |
217 | static void | 237 | void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) |
218 | __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | ||
219 | { | 238 | { |
220 | if (xprt->snd_task == task) { | 239 | if (xprt->snd_task == task) { |
221 | xprt->snd_task = NULL; | 240 | xprt->snd_task = NULL; |
222 | smp_mb__before_clear_bit(); | 241 | smp_mb__before_clear_bit(); |
223 | clear_bit(XPRT_LOCKED, &xprt->sockstate); | 242 | clear_bit(XPRT_LOCKED, &xprt->state); |
224 | smp_mb__after_clear_bit(); | 243 | smp_mb__after_clear_bit(); |
225 | __xprt_lock_write_next(xprt); | 244 | __xprt_lock_write_next(xprt); |
226 | } | 245 | } |
227 | } | 246 | } |
228 | 247 | ||
229 | static inline void | 248 | /** |
230 | xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) | 249 | * xprt_release_xprt_cong - allow other requests to use a transport |
231 | { | 250 | * @xprt: transport with other tasks potentially waiting |
232 | spin_lock_bh(&xprt->sock_lock); | 251 | * @task: task that is releasing access to the transport |
233 | __xprt_release_write(xprt, task); | 252 | * |
234 | spin_unlock_bh(&xprt->sock_lock); | 253 | * Note that "task" can be NULL. Another task is awoken to use the |
235 | } | 254 | * transport if the transport's congestion window allows it. |
236 | |||
237 | /* | ||
238 | * Write data to socket. | ||
239 | */ | 255 | */ |
240 | static inline int | 256 | void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) |
241 | xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req) | ||
242 | { | 257 | { |
243 | struct socket *sock = xprt->sock; | 258 | if (xprt->snd_task == task) { |
244 | struct xdr_buf *xdr = &req->rq_snd_buf; | 259 | xprt->snd_task = NULL; |
245 | struct sockaddr *addr = NULL; | 260 | smp_mb__before_clear_bit(); |
246 | int addrlen = 0; | 261 | clear_bit(XPRT_LOCKED, &xprt->state); |
247 | unsigned int skip; | 262 | smp_mb__after_clear_bit(); |
248 | int result; | 263 | __xprt_lock_write_next_cong(xprt); |
249 | |||
250 | if (!sock) | ||
251 | return -ENOTCONN; | ||
252 | |||
253 | xprt_pktdump("packet data:", | ||
254 | req->rq_svec->iov_base, | ||
255 | req->rq_svec->iov_len); | ||
256 | |||
257 | /* For UDP, we need to provide an address */ | ||
258 | if (!xprt->stream) { | ||
259 | addr = (struct sockaddr *) &xprt->addr; | ||
260 | addrlen = sizeof(xprt->addr); | ||
261 | } | 264 | } |
262 | /* Dont repeat bytes */ | 265 | } |
263 | skip = req->rq_bytes_sent; | ||
264 | |||
265 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
266 | result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT); | ||
267 | |||
268 | dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result); | ||
269 | |||
270 | if (result >= 0) | ||
271 | return result; | ||
272 | 266 | ||
273 | switch (result) { | 267 | static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) |
274 | case -ECONNREFUSED: | 268 | { |
275 | /* When the server has died, an ICMP port unreachable message | 269 | spin_lock_bh(&xprt->transport_lock); |
276 | * prompts ECONNREFUSED. | 270 | xprt->ops->release_xprt(xprt, task); |
277 | */ | 271 | spin_unlock_bh(&xprt->transport_lock); |
278 | case -EAGAIN: | ||
279 | break; | ||
280 | case -ECONNRESET: | ||
281 | case -ENOTCONN: | ||
282 | case -EPIPE: | ||
283 | /* connection broken */ | ||
284 | if (xprt->stream) | ||
285 | result = -ENOTCONN; | ||
286 | break; | ||
287 | default: | ||
288 | printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result); | ||
289 | } | ||
290 | return result; | ||
291 | } | 272 | } |
292 | 273 | ||
293 | /* | 274 | /* |
@@ -321,26 +302,40 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req) | |||
321 | return; | 302 | return; |
322 | req->rq_cong = 0; | 303 | req->rq_cong = 0; |
323 | xprt->cong -= RPC_CWNDSCALE; | 304 | xprt->cong -= RPC_CWNDSCALE; |
324 | __xprt_lock_write_next(xprt); | 305 | __xprt_lock_write_next_cong(xprt); |
325 | } | 306 | } |
326 | 307 | ||
327 | /* | 308 | /** |
328 | * Adjust RPC congestion window | 309 | * xprt_release_rqst_cong - housekeeping when request is complete |
310 | * @task: RPC request that recently completed | ||
311 | * | ||
312 | * Useful for transports that require congestion control. | ||
313 | */ | ||
314 | void xprt_release_rqst_cong(struct rpc_task *task) | ||
315 | { | ||
316 | __xprt_put_cong(task->tk_xprt, task->tk_rqstp); | ||
317 | } | ||
318 | |||
319 | /** | ||
320 | * xprt_adjust_cwnd - adjust transport congestion window | ||
321 | * @task: recently completed RPC request used to adjust window | ||
322 | * @result: result code of completed RPC request | ||
323 | * | ||
329 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. | 324 | * We use a time-smoothed congestion estimator to avoid heavy oscillation. |
330 | */ | 325 | */ |
331 | static void | 326 | void xprt_adjust_cwnd(struct rpc_task *task, int result) |
332 | xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) | ||
333 | { | 327 | { |
334 | unsigned long cwnd; | 328 | struct rpc_rqst *req = task->tk_rqstp; |
329 | struct rpc_xprt *xprt = task->tk_xprt; | ||
330 | unsigned long cwnd = xprt->cwnd; | ||
335 | 331 | ||
336 | cwnd = xprt->cwnd; | ||
337 | if (result >= 0 && cwnd <= xprt->cong) { | 332 | if (result >= 0 && cwnd <= xprt->cong) { |
338 | /* The (cwnd >> 1) term makes sure | 333 | /* The (cwnd >> 1) term makes sure |
339 | * the result gets rounded properly. */ | 334 | * the result gets rounded properly. */ |
340 | cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; | 335 | cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd; |
341 | if (cwnd > RPC_MAXCWND(xprt)) | 336 | if (cwnd > RPC_MAXCWND(xprt)) |
342 | cwnd = RPC_MAXCWND(xprt); | 337 | cwnd = RPC_MAXCWND(xprt); |
343 | __xprt_lock_write_next(xprt); | 338 | __xprt_lock_write_next_cong(xprt); |
344 | } else if (result == -ETIMEDOUT) { | 339 | } else if (result == -ETIMEDOUT) { |
345 | cwnd >>= 1; | 340 | cwnd >>= 1; |
346 | if (cwnd < RPC_CWNDSCALE) | 341 | if (cwnd < RPC_CWNDSCALE) |
@@ -349,11 +344,89 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result) | |||
349 | dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", | 344 | dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n", |
350 | xprt->cong, xprt->cwnd, cwnd); | 345 | xprt->cong, xprt->cwnd, cwnd); |
351 | xprt->cwnd = cwnd; | 346 | xprt->cwnd = cwnd; |
347 | __xprt_put_cong(xprt, req); | ||
348 | } | ||
349 | |||
350 | /** | ||
351 | * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue | ||
352 | * @xprt: transport with waiting tasks | ||
353 | * @status: result code to plant in each task before waking it | ||
354 | * | ||
355 | */ | ||
356 | void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) | ||
357 | { | ||
358 | if (status < 0) | ||
359 | rpc_wake_up_status(&xprt->pending, status); | ||
360 | else | ||
361 | rpc_wake_up(&xprt->pending); | ||
362 | } | ||
363 | |||
364 | /** | ||
365 | * xprt_wait_for_buffer_space - wait for transport output buffer to clear | ||
366 | * @task: task to be put to sleep | ||
367 | * | ||
368 | */ | ||
369 | void xprt_wait_for_buffer_space(struct rpc_task *task) | ||
370 | { | ||
371 | struct rpc_rqst *req = task->tk_rqstp; | ||
372 | struct rpc_xprt *xprt = req->rq_xprt; | ||
373 | |||
374 | task->tk_timeout = req->rq_timeout; | ||
375 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | ||
376 | } | ||
377 | |||
378 | /** | ||
379 | * xprt_write_space - wake the task waiting for transport output buffer space | ||
380 | * @xprt: transport with waiting tasks | ||
381 | * | ||
382 | * Can be called in a soft IRQ context, so xprt_write_space never sleeps. | ||
383 | */ | ||
384 | void xprt_write_space(struct rpc_xprt *xprt) | ||
385 | { | ||
386 | if (unlikely(xprt->shutdown)) | ||
387 | return; | ||
388 | |||
389 | spin_lock_bh(&xprt->transport_lock); | ||
390 | if (xprt->snd_task) { | ||
391 | dprintk("RPC: write space: waking waiting task on xprt %p\n", | ||
392 | xprt); | ||
393 | rpc_wake_up_task(xprt->snd_task); | ||
394 | } | ||
395 | spin_unlock_bh(&xprt->transport_lock); | ||
396 | } | ||
397 | |||
398 | /** | ||
399 | * xprt_set_retrans_timeout_def - set a request's retransmit timeout | ||
400 | * @task: task whose timeout is to be set | ||
401 | * | ||
402 | * Set a request's retransmit timeout based on the transport's | ||
403 | * default timeout parameters. Used by transports that don't adjust | ||
404 | * the retransmit timeout based on round-trip time estimation. | ||
405 | */ | ||
406 | void xprt_set_retrans_timeout_def(struct rpc_task *task) | ||
407 | { | ||
408 | task->tk_timeout = task->tk_rqstp->rq_timeout; | ||
352 | } | 409 | } |
353 | 410 | ||
354 | /* | 411 | /* |
355 | * Reset the major timeout value | 412 | * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout |
413 | * @task: task whose timeout is to be set | ||
414 | * | ||
415 | * Set a request's retransmit timeout using the RTT estimator. | ||
356 | */ | 416 | */ |
417 | void xprt_set_retrans_timeout_rtt(struct rpc_task *task) | ||
418 | { | ||
419 | int timer = task->tk_msg.rpc_proc->p_timer; | ||
420 | struct rpc_rtt *rtt = task->tk_client->cl_rtt; | ||
421 | struct rpc_rqst *req = task->tk_rqstp; | ||
422 | unsigned long max_timeout = req->rq_xprt->timeout.to_maxval; | ||
423 | |||
424 | task->tk_timeout = rpc_calc_rto(rtt, timer); | ||
425 | task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries; | ||
426 | if (task->tk_timeout > max_timeout || task->tk_timeout == 0) | ||
427 | task->tk_timeout = max_timeout; | ||
428 | } | ||
429 | |||
357 | static void xprt_reset_majortimeo(struct rpc_rqst *req) | 430 | static void xprt_reset_majortimeo(struct rpc_rqst *req) |
358 | { | 431 | { |
359 | struct rpc_timeout *to = &req->rq_xprt->timeout; | 432 | struct rpc_timeout *to = &req->rq_xprt->timeout; |
@@ -368,8 +441,10 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req) | |||
368 | req->rq_majortimeo += jiffies; | 441 | req->rq_majortimeo += jiffies; |
369 | } | 442 | } |
370 | 443 | ||
371 | /* | 444 | /** |
372 | * Adjust timeout values etc for next retransmit | 445 | * xprt_adjust_timeout - adjust timeout values for next retransmit |
446 | * @req: RPC request containing parameters to use for the adjustment | ||
447 | * | ||
373 | */ | 448 | */ |
374 | int xprt_adjust_timeout(struct rpc_rqst *req) | 449 | int xprt_adjust_timeout(struct rpc_rqst *req) |
375 | { | 450 | { |
@@ -391,9 +466,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req) | |||
391 | req->rq_retries = 0; | 466 | req->rq_retries = 0; |
392 | xprt_reset_majortimeo(req); | 467 | xprt_reset_majortimeo(req); |
393 | /* Reset the RTT counters == "slow start" */ | 468 | /* Reset the RTT counters == "slow start" */ |
394 | spin_lock_bh(&xprt->sock_lock); | 469 | spin_lock_bh(&xprt->transport_lock); |
395 | rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); | 470 | rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval); |
396 | spin_unlock_bh(&xprt->sock_lock); | 471 | spin_unlock_bh(&xprt->transport_lock); |
397 | pprintk("RPC: %lu timeout\n", jiffies); | 472 | pprintk("RPC: %lu timeout\n", jiffies); |
398 | status = -ETIMEDOUT; | 473 | status = -ETIMEDOUT; |
399 | } | 474 | } |
@@ -405,133 +480,52 @@ int xprt_adjust_timeout(struct rpc_rqst *req) | |||
405 | return status; | 480 | return status; |
406 | } | 481 | } |
407 | 482 | ||
408 | /* | 483 | static void xprt_autoclose(void *args) |
409 | * Close down a transport socket | ||
410 | */ | ||
411 | static void | ||
412 | xprt_close(struct rpc_xprt *xprt) | ||
413 | { | ||
414 | struct socket *sock = xprt->sock; | ||
415 | struct sock *sk = xprt->inet; | ||
416 | |||
417 | if (!sk) | ||
418 | return; | ||
419 | |||
420 | write_lock_bh(&sk->sk_callback_lock); | ||
421 | xprt->inet = NULL; | ||
422 | xprt->sock = NULL; | ||
423 | |||
424 | sk->sk_user_data = NULL; | ||
425 | sk->sk_data_ready = xprt->old_data_ready; | ||
426 | sk->sk_state_change = xprt->old_state_change; | ||
427 | sk->sk_write_space = xprt->old_write_space; | ||
428 | write_unlock_bh(&sk->sk_callback_lock); | ||
429 | |||
430 | sk->sk_no_check = 0; | ||
431 | |||
432 | sock_release(sock); | ||
433 | } | ||
434 | |||
435 | static void | ||
436 | xprt_socket_autoclose(void *args) | ||
437 | { | 484 | { |
438 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | 485 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; |
439 | 486 | ||
440 | xprt_disconnect(xprt); | 487 | xprt_disconnect(xprt); |
441 | xprt_close(xprt); | 488 | xprt->ops->close(xprt); |
442 | xprt_release_write(xprt, NULL); | 489 | xprt_release_write(xprt, NULL); |
443 | } | 490 | } |
444 | 491 | ||
445 | /* | 492 | /** |
446 | * Mark a transport as disconnected | 493 | * xprt_disconnect - mark a transport as disconnected |
494 | * @xprt: transport to flag for disconnect | ||
495 | * | ||
447 | */ | 496 | */ |
448 | static void | 497 | void xprt_disconnect(struct rpc_xprt *xprt) |
449 | xprt_disconnect(struct rpc_xprt *xprt) | ||
450 | { | 498 | { |
451 | dprintk("RPC: disconnected transport %p\n", xprt); | 499 | dprintk("RPC: disconnected transport %p\n", xprt); |
452 | spin_lock_bh(&xprt->sock_lock); | 500 | spin_lock_bh(&xprt->transport_lock); |
453 | xprt_clear_connected(xprt); | 501 | xprt_clear_connected(xprt); |
454 | rpc_wake_up_status(&xprt->pending, -ENOTCONN); | 502 | xprt_wake_pending_tasks(xprt, -ENOTCONN); |
455 | spin_unlock_bh(&xprt->sock_lock); | 503 | spin_unlock_bh(&xprt->transport_lock); |
456 | } | 504 | } |
457 | 505 | ||
458 | /* | ||
459 | * Used to allow disconnection when we've been idle | ||
460 | */ | ||
461 | static void | 506 | static void |
462 | xprt_init_autodisconnect(unsigned long data) | 507 | xprt_init_autodisconnect(unsigned long data) |
463 | { | 508 | { |
464 | struct rpc_xprt *xprt = (struct rpc_xprt *)data; | 509 | struct rpc_xprt *xprt = (struct rpc_xprt *)data; |
465 | 510 | ||
466 | spin_lock(&xprt->sock_lock); | 511 | spin_lock(&xprt->transport_lock); |
467 | if (!list_empty(&xprt->recv) || xprt->shutdown) | 512 | if (!list_empty(&xprt->recv) || xprt->shutdown) |
468 | goto out_abort; | 513 | goto out_abort; |
469 | if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) | 514 | if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) |
470 | goto out_abort; | 515 | goto out_abort; |
471 | spin_unlock(&xprt->sock_lock); | 516 | spin_unlock(&xprt->transport_lock); |
472 | /* Let keventd close the socket */ | 517 | if (xprt_connecting(xprt)) |
473 | if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0) | ||
474 | xprt_release_write(xprt, NULL); | 518 | xprt_release_write(xprt, NULL); |
475 | else | 519 | else |
476 | schedule_work(&xprt->task_cleanup); | 520 | schedule_work(&xprt->task_cleanup); |
477 | return; | 521 | return; |
478 | out_abort: | 522 | out_abort: |
479 | spin_unlock(&xprt->sock_lock); | 523 | spin_unlock(&xprt->transport_lock); |
480 | } | ||
481 | |||
482 | static void xprt_socket_connect(void *args) | ||
483 | { | ||
484 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
485 | struct socket *sock = xprt->sock; | ||
486 | int status = -EIO; | ||
487 | |||
488 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
489 | goto out; | ||
490 | |||
491 | /* | ||
492 | * Start by resetting any existing state | ||
493 | */ | ||
494 | xprt_close(xprt); | ||
495 | sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); | ||
496 | if (sock == NULL) { | ||
497 | /* couldn't create socket or bind to reserved port; | ||
498 | * this is likely a permanent error, so cause an abort */ | ||
499 | goto out; | ||
500 | } | ||
501 | xprt_bind_socket(xprt, sock); | ||
502 | xprt_sock_setbufsize(xprt); | ||
503 | |||
504 | status = 0; | ||
505 | if (!xprt->stream) | ||
506 | goto out; | ||
507 | |||
508 | /* | ||
509 | * Tell the socket layer to start connecting... | ||
510 | */ | ||
511 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
512 | sizeof(xprt->addr), O_NONBLOCK); | ||
513 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
514 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
515 | if (status < 0) { | ||
516 | switch (status) { | ||
517 | case -EINPROGRESS: | ||
518 | case -EALREADY: | ||
519 | goto out_clear; | ||
520 | } | ||
521 | } | ||
522 | out: | ||
523 | if (status < 0) | ||
524 | rpc_wake_up_status(&xprt->pending, status); | ||
525 | else | ||
526 | rpc_wake_up(&xprt->pending); | ||
527 | out_clear: | ||
528 | smp_mb__before_clear_bit(); | ||
529 | clear_bit(XPRT_CONNECTING, &xprt->sockstate); | ||
530 | smp_mb__after_clear_bit(); | ||
531 | } | 524 | } |
532 | 525 | ||
533 | /* | 526 | /** |
534 | * Attempt to connect a TCP socket. | 527 | * xprt_connect - schedule a transport connect operation |
528 | * @task: RPC task that is requesting the connect | ||
535 | * | 529 | * |
536 | */ | 530 | */ |
537 | void xprt_connect(struct rpc_task *task) | 531 | void xprt_connect(struct rpc_task *task) |
@@ -552,37 +546,19 @@ void xprt_connect(struct rpc_task *task) | |||
552 | if (!xprt_lock_write(xprt, task)) | 546 | if (!xprt_lock_write(xprt, task)) |
553 | return; | 547 | return; |
554 | if (xprt_connected(xprt)) | 548 | if (xprt_connected(xprt)) |
555 | goto out_write; | 549 | xprt_release_write(xprt, task); |
550 | else { | ||
551 | if (task->tk_rqstp) | ||
552 | task->tk_rqstp->rq_bytes_sent = 0; | ||
556 | 553 | ||
557 | if (task->tk_rqstp) | 554 | task->tk_timeout = xprt->connect_timeout; |
558 | task->tk_rqstp->rq_bytes_sent = 0; | 555 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); |
559 | 556 | xprt->ops->connect(task); | |
560 | task->tk_timeout = RPC_CONNECT_TIMEOUT; | ||
561 | rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); | ||
562 | if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) { | ||
563 | /* Note: if we are here due to a dropped connection | ||
564 | * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ | ||
565 | * seconds | ||
566 | */ | ||
567 | if (xprt->sock != NULL) | ||
568 | schedule_delayed_work(&xprt->sock_connect, | ||
569 | RPC_REESTABLISH_TIMEOUT); | ||
570 | else { | ||
571 | schedule_work(&xprt->sock_connect); | ||
572 | if (!RPC_IS_ASYNC(task)) | ||
573 | flush_scheduled_work(); | ||
574 | } | ||
575 | } | 557 | } |
576 | return; | 558 | return; |
577 | out_write: | ||
578 | xprt_release_write(xprt, task); | ||
579 | } | 559 | } |
580 | 560 | ||
581 | /* | 561 | static void xprt_connect_status(struct rpc_task *task) |
582 | * We arrive here when awoken from waiting on connection establishment. | ||
583 | */ | ||
584 | static void | ||
585 | xprt_connect_status(struct rpc_task *task) | ||
586 | { | 562 | { |
587 | struct rpc_xprt *xprt = task->tk_xprt; | 563 | struct rpc_xprt *xprt = task->tk_xprt; |
588 | 564 | ||
@@ -592,31 +568,42 @@ xprt_connect_status(struct rpc_task *task) | |||
592 | return; | 568 | return; |
593 | } | 569 | } |
594 | 570 | ||
595 | /* if soft mounted, just cause this RPC to fail */ | ||
596 | if (RPC_IS_SOFT(task)) | ||
597 | task->tk_status = -EIO; | ||
598 | |||
599 | switch (task->tk_status) { | 571 | switch (task->tk_status) { |
600 | case -ECONNREFUSED: | 572 | case -ECONNREFUSED: |
601 | case -ECONNRESET: | 573 | case -ECONNRESET: |
574 | dprintk("RPC: %4d xprt_connect_status: server %s refused connection\n", | ||
575 | task->tk_pid, task->tk_client->cl_server); | ||
576 | break; | ||
602 | case -ENOTCONN: | 577 | case -ENOTCONN: |
603 | return; | 578 | dprintk("RPC: %4d xprt_connect_status: connection broken\n", |
579 | task->tk_pid); | ||
580 | break; | ||
604 | case -ETIMEDOUT: | 581 | case -ETIMEDOUT: |
605 | dprintk("RPC: %4d xprt_connect_status: timed out\n", | 582 | dprintk("RPC: %4d xprt_connect_status: connect attempt timed out\n", |
606 | task->tk_pid); | 583 | task->tk_pid); |
607 | break; | 584 | break; |
608 | default: | 585 | default: |
609 | printk(KERN_ERR "RPC: error %d connecting to server %s\n", | 586 | dprintk("RPC: %4d xprt_connect_status: error %d connecting to server %s\n", |
610 | -task->tk_status, task->tk_client->cl_server); | 587 | task->tk_pid, -task->tk_status, task->tk_client->cl_server); |
588 | xprt_release_write(xprt, task); | ||
589 | task->tk_status = -EIO; | ||
590 | return; | ||
591 | } | ||
592 | |||
593 | /* if soft mounted, just cause this RPC to fail */ | ||
594 | if (RPC_IS_SOFT(task)) { | ||
595 | xprt_release_write(xprt, task); | ||
596 | task->tk_status = -EIO; | ||
611 | } | 597 | } |
612 | xprt_release_write(xprt, task); | ||
613 | } | 598 | } |
614 | 599 | ||
615 | /* | 600 | /** |
616 | * Look up the RPC request corresponding to a reply, and then lock it. | 601 | * xprt_lookup_rqst - find an RPC request corresponding to an XID |
602 | * @xprt: transport on which the original request was transmitted | ||
603 | * @xid: RPC XID of incoming reply | ||
604 | * | ||
617 | */ | 605 | */ |
618 | static inline struct rpc_rqst * | 606 | struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) |
619 | xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) | ||
620 | { | 607 | { |
621 | struct list_head *pos; | 608 | struct list_head *pos; |
622 | struct rpc_rqst *req = NULL; | 609 | struct rpc_rqst *req = NULL; |
@@ -631,556 +618,68 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid) | |||
631 | return req; | 618 | return req; |
632 | } | 619 | } |
633 | 620 | ||
634 | /* | 621 | /** |
635 | * Complete reply received. | 622 | * xprt_update_rtt - update an RPC client's RTT state after receiving a reply |
636 | * The TCP code relies on us to remove the request from xprt->pending. | 623 | * @task: RPC request that recently completed |
637 | */ | 624 | * |
638 | static void | ||
639 | xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied) | ||
640 | { | ||
641 | struct rpc_task *task = req->rq_task; | ||
642 | struct rpc_clnt *clnt = task->tk_client; | ||
643 | |||
644 | /* Adjust congestion window */ | ||
645 | if (!xprt->nocong) { | ||
646 | unsigned timer = task->tk_msg.rpc_proc->p_timer; | ||
647 | xprt_adjust_cwnd(xprt, copied); | ||
648 | __xprt_put_cong(xprt, req); | ||
649 | if (timer) { | ||
650 | if (req->rq_ntrans == 1) | ||
651 | rpc_update_rtt(clnt->cl_rtt, timer, | ||
652 | (long)jiffies - req->rq_xtime); | ||
653 | rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); | ||
654 | } | ||
655 | } | ||
656 | |||
657 | #ifdef RPC_PROFILE | ||
658 | /* Profile only reads for now */ | ||
659 | if (copied > 1024) { | ||
660 | static unsigned long nextstat; | ||
661 | static unsigned long pkt_rtt, pkt_len, pkt_cnt; | ||
662 | |||
663 | pkt_cnt++; | ||
664 | pkt_len += req->rq_slen + copied; | ||
665 | pkt_rtt += jiffies - req->rq_xtime; | ||
666 | if (time_before(nextstat, jiffies)) { | ||
667 | printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd); | ||
668 | printk("RPC: %ld %ld %ld %ld stat\n", | ||
669 | jiffies, pkt_cnt, pkt_len, pkt_rtt); | ||
670 | pkt_rtt = pkt_len = pkt_cnt = 0; | ||
671 | nextstat = jiffies + 5 * HZ; | ||
672 | } | ||
673 | } | ||
674 | #endif | ||
675 | |||
676 | dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied); | ||
677 | list_del_init(&req->rq_list); | ||
678 | req->rq_received = req->rq_private_buf.len = copied; | ||
679 | |||
680 | /* ... and wake up the process. */ | ||
681 | rpc_wake_up_task(task); | ||
682 | return; | ||
683 | } | ||
684 | |||
685 | static size_t | ||
686 | skb_read_bits(skb_reader_t *desc, void *to, size_t len) | ||
687 | { | ||
688 | if (len > desc->count) | ||
689 | len = desc->count; | ||
690 | if (skb_copy_bits(desc->skb, desc->offset, to, len)) | ||
691 | return 0; | ||
692 | desc->count -= len; | ||
693 | desc->offset += len; | ||
694 | return len; | ||
695 | } | ||
696 | |||
697 | static size_t | ||
698 | skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) | ||
699 | { | ||
700 | unsigned int csum2, pos; | ||
701 | |||
702 | if (len > desc->count) | ||
703 | len = desc->count; | ||
704 | pos = desc->offset; | ||
705 | csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0); | ||
706 | desc->csum = csum_block_add(desc->csum, csum2, pos); | ||
707 | desc->count -= len; | ||
708 | desc->offset += len; | ||
709 | return len; | ||
710 | } | ||
711 | |||
712 | /* | ||
713 | * We have set things up such that we perform the checksum of the UDP | ||
714 | * packet in parallel with the copies into the RPC client iovec. -DaveM | ||
715 | */ | ||
716 | int | ||
717 | csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) | ||
718 | { | ||
719 | skb_reader_t desc; | ||
720 | |||
721 | desc.skb = skb; | ||
722 | desc.offset = sizeof(struct udphdr); | ||
723 | desc.count = skb->len - desc.offset; | ||
724 | |||
725 | if (skb->ip_summed == CHECKSUM_UNNECESSARY) | ||
726 | goto no_checksum; | ||
727 | |||
728 | desc.csum = csum_partial(skb->data, desc.offset, skb->csum); | ||
729 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) | ||
730 | return -1; | ||
731 | if (desc.offset != skb->len) { | ||
732 | unsigned int csum2; | ||
733 | csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); | ||
734 | desc.csum = csum_block_add(desc.csum, csum2, desc.offset); | ||
735 | } | ||
736 | if (desc.count) | ||
737 | return -1; | ||
738 | if ((unsigned short)csum_fold(desc.csum)) | ||
739 | return -1; | ||
740 | return 0; | ||
741 | no_checksum: | ||
742 | if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) | ||
743 | return -1; | ||
744 | if (desc.count) | ||
745 | return -1; | ||
746 | return 0; | ||
747 | } | ||
748 | |||
749 | /* | ||
750 | * Input handler for RPC replies. Called from a bottom half and hence | ||
751 | * atomic. | ||
752 | */ | ||
753 | static void | ||
754 | udp_data_ready(struct sock *sk, int len) | ||
755 | { | ||
756 | struct rpc_task *task; | ||
757 | struct rpc_xprt *xprt; | ||
758 | struct rpc_rqst *rovr; | ||
759 | struct sk_buff *skb; | ||
760 | int err, repsize, copied; | ||
761 | u32 _xid, *xp; | ||
762 | |||
763 | read_lock(&sk->sk_callback_lock); | ||
764 | dprintk("RPC: udp_data_ready...\n"); | ||
765 | if (!(xprt = xprt_from_sock(sk))) { | ||
766 | printk("RPC: udp_data_ready request not found!\n"); | ||
767 | goto out; | ||
768 | } | ||
769 | |||
770 | dprintk("RPC: udp_data_ready client %p\n", xprt); | ||
771 | |||
772 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
773 | goto out; | ||
774 | |||
775 | if (xprt->shutdown) | ||
776 | goto dropit; | ||
777 | |||
778 | repsize = skb->len - sizeof(struct udphdr); | ||
779 | if (repsize < 4) { | ||
780 | printk("RPC: impossible RPC reply size %d!\n", repsize); | ||
781 | goto dropit; | ||
782 | } | ||
783 | |||
784 | /* Copy the XID from the skb... */ | ||
785 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
786 | sizeof(_xid), &_xid); | ||
787 | if (xp == NULL) | ||
788 | goto dropit; | ||
789 | |||
790 | /* Look up and lock the request corresponding to the given XID */ | ||
791 | spin_lock(&xprt->sock_lock); | ||
792 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
793 | if (!rovr) | ||
794 | goto out_unlock; | ||
795 | task = rovr->rq_task; | ||
796 | |||
797 | dprintk("RPC: %4d received reply\n", task->tk_pid); | ||
798 | |||
799 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
800 | copied = repsize; | ||
801 | |||
802 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
803 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
804 | goto out_unlock; | ||
805 | |||
806 | /* Something worked... */ | ||
807 | dst_confirm(skb->dst); | ||
808 | |||
809 | xprt_complete_rqst(xprt, rovr, copied); | ||
810 | |||
811 | out_unlock: | ||
812 | spin_unlock(&xprt->sock_lock); | ||
813 | dropit: | ||
814 | skb_free_datagram(sk, skb); | ||
815 | out: | ||
816 | read_unlock(&sk->sk_callback_lock); | ||
817 | } | ||
818 | |||
819 | /* | ||
820 | * Copy from an skb into memory and shrink the skb. | ||
821 | */ | ||
822 | static inline size_t | ||
823 | tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
824 | { | ||
825 | if (len > desc->count) | ||
826 | len = desc->count; | ||
827 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) { | ||
828 | dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", | ||
829 | len, desc->count); | ||
830 | return 0; | ||
831 | } | ||
832 | desc->offset += len; | ||
833 | desc->count -= len; | ||
834 | dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", | ||
835 | len, desc->count); | ||
836 | return len; | ||
837 | } | ||
838 | |||
839 | /* | ||
840 | * TCP read fragment marker | ||
841 | */ | ||
842 | static inline void | ||
843 | tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
844 | { | ||
845 | size_t len, used; | ||
846 | char *p; | ||
847 | |||
848 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
849 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
850 | used = tcp_copy_data(desc, p, len); | ||
851 | xprt->tcp_offset += used; | ||
852 | if (used != len) | ||
853 | return; | ||
854 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
855 | if (xprt->tcp_reclen & 0x80000000) | ||
856 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
857 | else | ||
858 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
859 | xprt->tcp_reclen &= 0x7fffffff; | ||
860 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
861 | xprt->tcp_offset = 0; | ||
862 | /* Sanity check of the record length */ | ||
863 | if (xprt->tcp_reclen < 4) { | ||
864 | printk(KERN_ERR "RPC: Invalid TCP record fragment length\n"); | ||
865 | xprt_disconnect(xprt); | ||
866 | } | ||
867 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
868 | xprt->tcp_reclen); | ||
869 | } | ||
870 | |||
871 | static void | ||
872 | tcp_check_recm(struct rpc_xprt *xprt) | ||
873 | { | ||
874 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", | ||
875 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); | ||
876 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
877 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
878 | xprt->tcp_offset = 0; | ||
879 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
880 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
881 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
882 | xprt->tcp_copied = 0; | ||
883 | } | ||
884 | } | ||
885 | } | ||
886 | |||
887 | /* | ||
888 | * TCP read xid | ||
889 | */ | ||
890 | static inline void | ||
891 | tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
892 | { | ||
893 | size_t len, used; | ||
894 | char *p; | ||
895 | |||
896 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
897 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
898 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
899 | used = tcp_copy_data(desc, p, len); | ||
900 | xprt->tcp_offset += used; | ||
901 | if (used != len) | ||
902 | return; | ||
903 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
904 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
905 | xprt->tcp_copied = 4; | ||
906 | dprintk("RPC: reading reply for XID %08x\n", | ||
907 | ntohl(xprt->tcp_xid)); | ||
908 | tcp_check_recm(xprt); | ||
909 | } | ||
910 | |||
911 | /* | ||
912 | * TCP read and complete request | ||
913 | */ | ||
914 | static inline void | ||
915 | tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
916 | { | ||
917 | struct rpc_rqst *req; | ||
918 | struct xdr_buf *rcvbuf; | ||
919 | size_t len; | ||
920 | ssize_t r; | ||
921 | |||
922 | /* Find and lock the request corresponding to this xid */ | ||
923 | spin_lock(&xprt->sock_lock); | ||
924 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
925 | if (!req) { | ||
926 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
927 | dprintk("RPC: XID %08x request not found!\n", | ||
928 | ntohl(xprt->tcp_xid)); | ||
929 | spin_unlock(&xprt->sock_lock); | ||
930 | return; | ||
931 | } | ||
932 | |||
933 | rcvbuf = &req->rq_private_buf; | ||
934 | len = desc->count; | ||
935 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
936 | skb_reader_t my_desc; | ||
937 | |||
938 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
939 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
940 | my_desc.count = len; | ||
941 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
942 | &my_desc, tcp_copy_data); | ||
943 | desc->count -= r; | ||
944 | desc->offset += r; | ||
945 | } else | ||
946 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
947 | desc, tcp_copy_data); | ||
948 | |||
949 | if (r > 0) { | ||
950 | xprt->tcp_copied += r; | ||
951 | xprt->tcp_offset += r; | ||
952 | } | ||
953 | if (r != len) { | ||
954 | /* Error when copying to the receive buffer, | ||
955 | * usually because we weren't able to allocate | ||
956 | * additional buffer pages. All we can do now | ||
957 | * is turn off XPRT_COPY_DATA, so the request | ||
958 | * will not receive any additional updates, | ||
959 | * and time out. | ||
960 | * Any remaining data from this record will | ||
961 | * be discarded. | ||
962 | */ | ||
963 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
964 | dprintk("RPC: XID %08x truncated request\n", | ||
965 | ntohl(xprt->tcp_xid)); | ||
966 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
967 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
968 | goto out; | ||
969 | } | ||
970 | |||
971 | dprintk("RPC: XID %08x read %Zd bytes\n", | ||
972 | ntohl(xprt->tcp_xid), r); | ||
973 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
974 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
975 | |||
976 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
977 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
978 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
979 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
980 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
981 | } | ||
982 | |||
983 | out: | ||
984 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) { | ||
985 | dprintk("RPC: %4d received reply complete\n", | ||
986 | req->rq_task->tk_pid); | ||
987 | xprt_complete_rqst(xprt, req, xprt->tcp_copied); | ||
988 | } | ||
989 | spin_unlock(&xprt->sock_lock); | ||
990 | tcp_check_recm(xprt); | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | * TCP discard extra bytes from a short read | ||
995 | */ | ||
996 | static inline void | ||
997 | tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
998 | { | ||
999 | size_t len; | ||
1000 | |||
1001 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
1002 | if (len > desc->count) | ||
1003 | len = desc->count; | ||
1004 | desc->count -= len; | ||
1005 | desc->offset += len; | ||
1006 | xprt->tcp_offset += len; | ||
1007 | dprintk("RPC: discarded %Zu bytes\n", len); | ||
1008 | tcp_check_recm(xprt); | ||
1009 | } | ||
1010 | |||
1011 | /* | ||
1012 | * TCP record receive routine | ||
1013 | * We first have to grab the record marker, then the XID, then the data. | ||
1014 | */ | 625 | */ |
1015 | static int | 626 | void xprt_update_rtt(struct rpc_task *task) |
1016 | tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, | ||
1017 | unsigned int offset, size_t len) | ||
1018 | { | ||
1019 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
1020 | skb_reader_t desc = { | ||
1021 | .skb = skb, | ||
1022 | .offset = offset, | ||
1023 | .count = len, | ||
1024 | .csum = 0 | ||
1025 | }; | ||
1026 | |||
1027 | dprintk("RPC: tcp_data_recv\n"); | ||
1028 | do { | ||
1029 | /* Read in a new fragment marker if necessary */ | ||
1030 | /* Can we ever really expect to get completely empty fragments? */ | ||
1031 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
1032 | tcp_read_fraghdr(xprt, &desc); | ||
1033 | continue; | ||
1034 | } | ||
1035 | /* Read in the xid if necessary */ | ||
1036 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
1037 | tcp_read_xid(xprt, &desc); | ||
1038 | continue; | ||
1039 | } | ||
1040 | /* Read in the request data */ | ||
1041 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
1042 | tcp_read_request(xprt, &desc); | ||
1043 | continue; | ||
1044 | } | ||
1045 | /* Skip over any trailing bytes on short reads */ | ||
1046 | tcp_read_discard(xprt, &desc); | ||
1047 | } while (desc.count); | ||
1048 | dprintk("RPC: tcp_data_recv done\n"); | ||
1049 | return len - desc.count; | ||
1050 | } | ||
1051 | |||
1052 | static void tcp_data_ready(struct sock *sk, int bytes) | ||
1053 | { | 627 | { |
1054 | struct rpc_xprt *xprt; | 628 | struct rpc_rqst *req = task->tk_rqstp; |
1055 | read_descriptor_t rd_desc; | 629 | struct rpc_rtt *rtt = task->tk_client->cl_rtt; |
1056 | 630 | unsigned timer = task->tk_msg.rpc_proc->p_timer; | |
1057 | read_lock(&sk->sk_callback_lock); | ||
1058 | dprintk("RPC: tcp_data_ready...\n"); | ||
1059 | if (!(xprt = xprt_from_sock(sk))) { | ||
1060 | printk("RPC: tcp_data_ready socket info not found!\n"); | ||
1061 | goto out; | ||
1062 | } | ||
1063 | if (xprt->shutdown) | ||
1064 | goto out; | ||
1065 | |||
1066 | /* We use rd_desc to pass struct xprt to tcp_data_recv */ | ||
1067 | rd_desc.arg.data = xprt; | ||
1068 | rd_desc.count = 65536; | ||
1069 | tcp_read_sock(sk, &rd_desc, tcp_data_recv); | ||
1070 | out: | ||
1071 | read_unlock(&sk->sk_callback_lock); | ||
1072 | } | ||
1073 | |||
1074 | static void | ||
1075 | tcp_state_change(struct sock *sk) | ||
1076 | { | ||
1077 | struct rpc_xprt *xprt; | ||
1078 | 631 | ||
1079 | read_lock(&sk->sk_callback_lock); | 632 | if (timer) { |
1080 | if (!(xprt = xprt_from_sock(sk))) | 633 | if (req->rq_ntrans == 1) |
1081 | goto out; | 634 | rpc_update_rtt(rtt, timer, |
1082 | dprintk("RPC: tcp_state_change client %p...\n", xprt); | 635 | (long)jiffies - req->rq_xtime); |
1083 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | 636 | rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); |
1084 | sk->sk_state, xprt_connected(xprt), | ||
1085 | sock_flag(sk, SOCK_DEAD), | ||
1086 | sock_flag(sk, SOCK_ZAPPED)); | ||
1087 | |||
1088 | switch (sk->sk_state) { | ||
1089 | case TCP_ESTABLISHED: | ||
1090 | spin_lock_bh(&xprt->sock_lock); | ||
1091 | if (!xprt_test_and_set_connected(xprt)) { | ||
1092 | /* Reset TCP record info */ | ||
1093 | xprt->tcp_offset = 0; | ||
1094 | xprt->tcp_reclen = 0; | ||
1095 | xprt->tcp_copied = 0; | ||
1096 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
1097 | rpc_wake_up(&xprt->pending); | ||
1098 | } | ||
1099 | spin_unlock_bh(&xprt->sock_lock); | ||
1100 | break; | ||
1101 | case TCP_SYN_SENT: | ||
1102 | case TCP_SYN_RECV: | ||
1103 | break; | ||
1104 | default: | ||
1105 | xprt_disconnect(xprt); | ||
1106 | break; | ||
1107 | } | 637 | } |
1108 | out: | ||
1109 | read_unlock(&sk->sk_callback_lock); | ||
1110 | } | 638 | } |
1111 | 639 | ||
1112 | /* | 640 | /** |
1113 | * Called when more output buffer space is available for this socket. | 641 | * xprt_complete_rqst - called when reply processing is complete |
1114 | * We try not to wake our writers until they can make "significant" | 642 | * @task: RPC request that recently completed |
1115 | * progress, otherwise we'll waste resources thrashing sock_sendmsg | 643 | * @copied: actual number of bytes received from the transport |
1116 | * with a bunch of small requests. | 644 | * |
645 | * Caller holds transport lock. | ||
1117 | */ | 646 | */ |
1118 | static void | 647 | void xprt_complete_rqst(struct rpc_task *task, int copied) |
1119 | xprt_write_space(struct sock *sk) | ||
1120 | { | 648 | { |
1121 | struct rpc_xprt *xprt; | 649 | struct rpc_rqst *req = task->tk_rqstp; |
1122 | struct socket *sock; | ||
1123 | |||
1124 | read_lock(&sk->sk_callback_lock); | ||
1125 | if (!(xprt = xprt_from_sock(sk)) || !(sock = sk->sk_socket)) | ||
1126 | goto out; | ||
1127 | if (xprt->shutdown) | ||
1128 | goto out; | ||
1129 | |||
1130 | /* Wait until we have enough socket memory */ | ||
1131 | if (xprt->stream) { | ||
1132 | /* from net/core/stream.c:sk_stream_write_space */ | ||
1133 | if (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)) | ||
1134 | goto out; | ||
1135 | } else { | ||
1136 | /* from net/core/sock.c:sock_def_write_space */ | ||
1137 | if (!sock_writeable(sk)) | ||
1138 | goto out; | ||
1139 | } | ||
1140 | 650 | ||
1141 | if (!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)) | 651 | dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", |
1142 | goto out; | 652 | task->tk_pid, ntohl(req->rq_xid), copied); |
1143 | 653 | ||
1144 | spin_lock_bh(&xprt->sock_lock); | 654 | list_del_init(&req->rq_list); |
1145 | if (xprt->snd_task) | 655 | req->rq_received = req->rq_private_buf.len = copied; |
1146 | rpc_wake_up_task(xprt->snd_task); | 656 | rpc_wake_up_task(task); |
1147 | spin_unlock_bh(&xprt->sock_lock); | ||
1148 | out: | ||
1149 | read_unlock(&sk->sk_callback_lock); | ||
1150 | } | 657 | } |
1151 | 658 | ||
1152 | /* | 659 | static void xprt_timer(struct rpc_task *task) |
1153 | * RPC receive timeout handler. | ||
1154 | */ | ||
1155 | static void | ||
1156 | xprt_timer(struct rpc_task *task) | ||
1157 | { | 660 | { |
1158 | struct rpc_rqst *req = task->tk_rqstp; | 661 | struct rpc_rqst *req = task->tk_rqstp; |
1159 | struct rpc_xprt *xprt = req->rq_xprt; | 662 | struct rpc_xprt *xprt = req->rq_xprt; |
1160 | 663 | ||
1161 | spin_lock(&xprt->sock_lock); | 664 | dprintk("RPC: %4d xprt_timer\n", task->tk_pid); |
1162 | if (req->rq_received) | ||
1163 | goto out; | ||
1164 | |||
1165 | xprt_adjust_cwnd(req->rq_xprt, -ETIMEDOUT); | ||
1166 | __xprt_put_cong(xprt, req); | ||
1167 | 665 | ||
1168 | dprintk("RPC: %4d xprt_timer (%s request)\n", | 666 | spin_lock(&xprt->transport_lock); |
1169 | task->tk_pid, req ? "pending" : "backlogged"); | 667 | if (!req->rq_received) { |
1170 | 668 | if (xprt->ops->timer) | |
1171 | task->tk_status = -ETIMEDOUT; | 669 | xprt->ops->timer(task); |
1172 | out: | 670 | task->tk_status = -ETIMEDOUT; |
671 | } | ||
1173 | task->tk_timeout = 0; | 672 | task->tk_timeout = 0; |
1174 | rpc_wake_up_task(task); | 673 | rpc_wake_up_task(task); |
1175 | spin_unlock(&xprt->sock_lock); | 674 | spin_unlock(&xprt->transport_lock); |
1176 | } | 675 | } |
1177 | 676 | ||
1178 | /* | 677 | /** |
1179 | * Place the actual RPC call. | 678 | * xprt_prepare_transmit - reserve the transport before sending a request |
1180 | * We have to copy the iovec because sendmsg fiddles with its contents. | 679 | * @task: RPC task about to send a request |
680 | * | ||
1181 | */ | 681 | */ |
1182 | int | 682 | int xprt_prepare_transmit(struct rpc_task *task) |
1183 | xprt_prepare_transmit(struct rpc_task *task) | ||
1184 | { | 683 | { |
1185 | struct rpc_rqst *req = task->tk_rqstp; | 684 | struct rpc_rqst *req = task->tk_rqstp; |
1186 | struct rpc_xprt *xprt = req->rq_xprt; | 685 | struct rpc_xprt *xprt = req->rq_xprt; |
@@ -1191,12 +690,12 @@ xprt_prepare_transmit(struct rpc_task *task) | |||
1191 | if (xprt->shutdown) | 690 | if (xprt->shutdown) |
1192 | return -EIO; | 691 | return -EIO; |
1193 | 692 | ||
1194 | spin_lock_bh(&xprt->sock_lock); | 693 | spin_lock_bh(&xprt->transport_lock); |
1195 | if (req->rq_received && !req->rq_bytes_sent) { | 694 | if (req->rq_received && !req->rq_bytes_sent) { |
1196 | err = req->rq_received; | 695 | err = req->rq_received; |
1197 | goto out_unlock; | 696 | goto out_unlock; |
1198 | } | 697 | } |
1199 | if (!__xprt_lock_write(xprt, task)) { | 698 | if (!xprt->ops->reserve_xprt(task)) { |
1200 | err = -EAGAIN; | 699 | err = -EAGAIN; |
1201 | goto out_unlock; | 700 | goto out_unlock; |
1202 | } | 701 | } |
@@ -1206,39 +705,42 @@ xprt_prepare_transmit(struct rpc_task *task) | |||
1206 | goto out_unlock; | 705 | goto out_unlock; |
1207 | } | 706 | } |
1208 | out_unlock: | 707 | out_unlock: |
1209 | spin_unlock_bh(&xprt->sock_lock); | 708 | spin_unlock_bh(&xprt->transport_lock); |
1210 | return err; | 709 | return err; |
1211 | } | 710 | } |
1212 | 711 | ||
1213 | void | 712 | void |
1214 | xprt_transmit(struct rpc_task *task) | 713 | xprt_abort_transmit(struct rpc_task *task) |
714 | { | ||
715 | struct rpc_xprt *xprt = task->tk_xprt; | ||
716 | |||
717 | xprt_release_write(xprt, task); | ||
718 | } | ||
719 | |||
720 | /** | ||
721 | * xprt_transmit - send an RPC request on a transport | ||
722 | * @task: controlling RPC task | ||
723 | * | ||
724 | * We have to copy the iovec because sendmsg fiddles with its contents. | ||
725 | */ | ||
726 | void xprt_transmit(struct rpc_task *task) | ||
1215 | { | 727 | { |
1216 | struct rpc_clnt *clnt = task->tk_client; | ||
1217 | struct rpc_rqst *req = task->tk_rqstp; | 728 | struct rpc_rqst *req = task->tk_rqstp; |
1218 | struct rpc_xprt *xprt = req->rq_xprt; | 729 | struct rpc_xprt *xprt = req->rq_xprt; |
1219 | int status, retry = 0; | 730 | int status; |
1220 | |||
1221 | 731 | ||
1222 | dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); | 732 | dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); |
1223 | 733 | ||
1224 | /* set up everything as needed. */ | ||
1225 | /* Write the record marker */ | ||
1226 | if (xprt->stream) { | ||
1227 | u32 *marker = req->rq_svec[0].iov_base; | ||
1228 | |||
1229 | *marker = htonl(0x80000000|(req->rq_slen-sizeof(*marker))); | ||
1230 | } | ||
1231 | |||
1232 | smp_rmb(); | 734 | smp_rmb(); |
1233 | if (!req->rq_received) { | 735 | if (!req->rq_received) { |
1234 | if (list_empty(&req->rq_list)) { | 736 | if (list_empty(&req->rq_list)) { |
1235 | spin_lock_bh(&xprt->sock_lock); | 737 | spin_lock_bh(&xprt->transport_lock); |
1236 | /* Update the softirq receive buffer */ | 738 | /* Update the softirq receive buffer */ |
1237 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, | 739 | memcpy(&req->rq_private_buf, &req->rq_rcv_buf, |
1238 | sizeof(req->rq_private_buf)); | 740 | sizeof(req->rq_private_buf)); |
1239 | /* Add request to the receive list */ | 741 | /* Add request to the receive list */ |
1240 | list_add_tail(&req->rq_list, &xprt->recv); | 742 | list_add_tail(&req->rq_list, &xprt->recv); |
1241 | spin_unlock_bh(&xprt->sock_lock); | 743 | spin_unlock_bh(&xprt->transport_lock); |
1242 | xprt_reset_majortimeo(req); | 744 | xprt_reset_majortimeo(req); |
1243 | /* Turn off autodisconnect */ | 745 | /* Turn off autodisconnect */ |
1244 | del_singleshot_timer_sync(&xprt->timer); | 746 | del_singleshot_timer_sync(&xprt->timer); |
@@ -1246,40 +748,19 @@ xprt_transmit(struct rpc_task *task) | |||
1246 | } else if (!req->rq_bytes_sent) | 748 | } else if (!req->rq_bytes_sent) |
1247 | return; | 749 | return; |
1248 | 750 | ||
1249 | /* Continue transmitting the packet/record. We must be careful | 751 | status = xprt->ops->send_request(task); |
1250 | * to cope with writespace callbacks arriving _after_ we have | 752 | if (status == 0) { |
1251 | * called xprt_sendmsg(). | 753 | dprintk("RPC: %4d xmit complete\n", task->tk_pid); |
1252 | */ | 754 | spin_lock_bh(&xprt->transport_lock); |
1253 | while (1) { | 755 | xprt->ops->set_retrans_timeout(task); |
1254 | req->rq_xtime = jiffies; | 756 | /* Don't race with disconnect */ |
1255 | status = xprt_sendmsg(xprt, req); | 757 | if (!xprt_connected(xprt)) |
1256 | 758 | task->tk_status = -ENOTCONN; | |
1257 | if (status < 0) | 759 | else if (!req->rq_received) |
1258 | break; | 760 | rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); |
1259 | 761 | xprt->ops->release_xprt(xprt, task); | |
1260 | if (xprt->stream) { | 762 | spin_unlock_bh(&xprt->transport_lock); |
1261 | req->rq_bytes_sent += status; | 763 | return; |
1262 | |||
1263 | /* If we've sent the entire packet, immediately | ||
1264 | * reset the count of bytes sent. */ | ||
1265 | if (req->rq_bytes_sent >= req->rq_slen) { | ||
1266 | req->rq_bytes_sent = 0; | ||
1267 | goto out_receive; | ||
1268 | } | ||
1269 | } else { | ||
1270 | if (status >= req->rq_slen) | ||
1271 | goto out_receive; | ||
1272 | status = -EAGAIN; | ||
1273 | break; | ||
1274 | } | ||
1275 | |||
1276 | dprintk("RPC: %4d xmit incomplete (%d left of %d)\n", | ||
1277 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
1278 | req->rq_slen); | ||
1279 | |||
1280 | status = -EAGAIN; | ||
1281 | if (retry++ > 50) | ||
1282 | break; | ||
1283 | } | 764 | } |
1284 | 765 | ||
1285 | /* Note: at this point, task->tk_sleeping has not yet been set, | 766 | /* Note: at this point, task->tk_sleeping has not yet been set, |
@@ -1289,60 +770,19 @@ xprt_transmit(struct rpc_task *task) | |||
1289 | task->tk_status = status; | 770 | task->tk_status = status; |
1290 | 771 | ||
1291 | switch (status) { | 772 | switch (status) { |
1292 | case -EAGAIN: | ||
1293 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
1294 | /* Protect against races with xprt_write_space */ | ||
1295 | spin_lock_bh(&xprt->sock_lock); | ||
1296 | /* Don't race with disconnect */ | ||
1297 | if (!xprt_connected(xprt)) | ||
1298 | task->tk_status = -ENOTCONN; | ||
1299 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) { | ||
1300 | task->tk_timeout = req->rq_timeout; | ||
1301 | rpc_sleep_on(&xprt->pending, task, NULL, NULL); | ||
1302 | } | ||
1303 | spin_unlock_bh(&xprt->sock_lock); | ||
1304 | return; | ||
1305 | } | ||
1306 | /* Keep holding the socket if it is blocked */ | ||
1307 | rpc_delay(task, HZ>>4); | ||
1308 | return; | ||
1309 | case -ECONNREFUSED: | 773 | case -ECONNREFUSED: |
1310 | task->tk_timeout = RPC_REESTABLISH_TIMEOUT; | ||
1311 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); | 774 | rpc_sleep_on(&xprt->sending, task, NULL, NULL); |
775 | case -EAGAIN: | ||
1312 | case -ENOTCONN: | 776 | case -ENOTCONN: |
1313 | return; | 777 | return; |
1314 | default: | 778 | default: |
1315 | if (xprt->stream) | 779 | break; |
1316 | xprt_disconnect(xprt); | ||
1317 | } | 780 | } |
1318 | xprt_release_write(xprt, task); | 781 | xprt_release_write(xprt, task); |
1319 | return; | 782 | return; |
1320 | out_receive: | ||
1321 | dprintk("RPC: %4d xmit complete\n", task->tk_pid); | ||
1322 | /* Set the task's receive timeout value */ | ||
1323 | spin_lock_bh(&xprt->sock_lock); | ||
1324 | if (!xprt->nocong) { | ||
1325 | int timer = task->tk_msg.rpc_proc->p_timer; | ||
1326 | task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); | ||
1327 | task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer) + req->rq_retries; | ||
1328 | if (task->tk_timeout > xprt->timeout.to_maxval || task->tk_timeout == 0) | ||
1329 | task->tk_timeout = xprt->timeout.to_maxval; | ||
1330 | } else | ||
1331 | task->tk_timeout = req->rq_timeout; | ||
1332 | /* Don't race with disconnect */ | ||
1333 | if (!xprt_connected(xprt)) | ||
1334 | task->tk_status = -ENOTCONN; | ||
1335 | else if (!req->rq_received) | ||
1336 | rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); | ||
1337 | __xprt_release_write(xprt, task); | ||
1338 | spin_unlock_bh(&xprt->sock_lock); | ||
1339 | } | 783 | } |
1340 | 784 | ||
1341 | /* | 785 | static inline void do_xprt_reserve(struct rpc_task *task) |
1342 | * Reserve an RPC call slot. | ||
1343 | */ | ||
1344 | static inline void | ||
1345 | do_xprt_reserve(struct rpc_task *task) | ||
1346 | { | 786 | { |
1347 | struct rpc_xprt *xprt = task->tk_xprt; | 787 | struct rpc_xprt *xprt = task->tk_xprt; |
1348 | 788 | ||
@@ -1362,22 +802,25 @@ do_xprt_reserve(struct rpc_task *task) | |||
1362 | rpc_sleep_on(&xprt->backlog, task, NULL, NULL); | 802 | rpc_sleep_on(&xprt->backlog, task, NULL, NULL); |
1363 | } | 803 | } |
1364 | 804 | ||
1365 | void | 805 | /** |
1366 | xprt_reserve(struct rpc_task *task) | 806 | * xprt_reserve - allocate an RPC request slot |
807 | * @task: RPC task requesting a slot allocation | ||
808 | * | ||
809 | * If no more slots are available, place the task on the transport's | ||
810 | * backlog queue. | ||
811 | */ | ||
812 | void xprt_reserve(struct rpc_task *task) | ||
1367 | { | 813 | { |
1368 | struct rpc_xprt *xprt = task->tk_xprt; | 814 | struct rpc_xprt *xprt = task->tk_xprt; |
1369 | 815 | ||
1370 | task->tk_status = -EIO; | 816 | task->tk_status = -EIO; |
1371 | if (!xprt->shutdown) { | 817 | if (!xprt->shutdown) { |
1372 | spin_lock(&xprt->xprt_lock); | 818 | spin_lock(&xprt->reserve_lock); |
1373 | do_xprt_reserve(task); | 819 | do_xprt_reserve(task); |
1374 | spin_unlock(&xprt->xprt_lock); | 820 | spin_unlock(&xprt->reserve_lock); |
1375 | } | 821 | } |
1376 | } | 822 | } |
1377 | 823 | ||
1378 | /* | ||
1379 | * Allocate a 'unique' XID | ||
1380 | */ | ||
1381 | static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) | 824 | static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) |
1382 | { | 825 | { |
1383 | return xprt->xid++; | 826 | return xprt->xid++; |
@@ -1388,11 +831,7 @@ static inline void xprt_init_xid(struct rpc_xprt *xprt) | |||
1388 | get_random_bytes(&xprt->xid, sizeof(xprt->xid)); | 831 | get_random_bytes(&xprt->xid, sizeof(xprt->xid)); |
1389 | } | 832 | } |
1390 | 833 | ||
1391 | /* | 834 | static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) |
1392 | * Initialize RPC request | ||
1393 | */ | ||
1394 | static void | ||
1395 | xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | ||
1396 | { | 835 | { |
1397 | struct rpc_rqst *req = task->tk_rqstp; | 836 | struct rpc_rqst *req = task->tk_rqstp; |
1398 | 837 | ||
@@ -1400,128 +839,104 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) | |||
1400 | req->rq_task = task; | 839 | req->rq_task = task; |
1401 | req->rq_xprt = xprt; | 840 | req->rq_xprt = xprt; |
1402 | req->rq_xid = xprt_alloc_xid(xprt); | 841 | req->rq_xid = xprt_alloc_xid(xprt); |
842 | req->rq_release_snd_buf = NULL; | ||
1403 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, | 843 | dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, |
1404 | req, ntohl(req->rq_xid)); | 844 | req, ntohl(req->rq_xid)); |
1405 | } | 845 | } |
1406 | 846 | ||
1407 | /* | 847 | /** |
1408 | * Release an RPC call slot | 848 | * xprt_release - release an RPC request slot |
849 | * @task: task which is finished with the slot | ||
850 | * | ||
1409 | */ | 851 | */ |
1410 | void | 852 | void xprt_release(struct rpc_task *task) |
1411 | xprt_release(struct rpc_task *task) | ||
1412 | { | 853 | { |
1413 | struct rpc_xprt *xprt = task->tk_xprt; | 854 | struct rpc_xprt *xprt = task->tk_xprt; |
1414 | struct rpc_rqst *req; | 855 | struct rpc_rqst *req; |
1415 | 856 | ||
1416 | if (!(req = task->tk_rqstp)) | 857 | if (!(req = task->tk_rqstp)) |
1417 | return; | 858 | return; |
1418 | spin_lock_bh(&xprt->sock_lock); | 859 | spin_lock_bh(&xprt->transport_lock); |
1419 | __xprt_release_write(xprt, task); | 860 | xprt->ops->release_xprt(xprt, task); |
1420 | __xprt_put_cong(xprt, req); | 861 | if (xprt->ops->release_request) |
862 | xprt->ops->release_request(task); | ||
1421 | if (!list_empty(&req->rq_list)) | 863 | if (!list_empty(&req->rq_list)) |
1422 | list_del(&req->rq_list); | 864 | list_del(&req->rq_list); |
1423 | xprt->last_used = jiffies; | 865 | xprt->last_used = jiffies; |
1424 | if (list_empty(&xprt->recv) && !xprt->shutdown) | 866 | if (list_empty(&xprt->recv) && !xprt->shutdown) |
1425 | mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); | 867 | mod_timer(&xprt->timer, |
1426 | spin_unlock_bh(&xprt->sock_lock); | 868 | xprt->last_used + xprt->idle_timeout); |
869 | spin_unlock_bh(&xprt->transport_lock); | ||
1427 | task->tk_rqstp = NULL; | 870 | task->tk_rqstp = NULL; |
871 | if (req->rq_release_snd_buf) | ||
872 | req->rq_release_snd_buf(req); | ||
1428 | memset(req, 0, sizeof(*req)); /* mark unused */ | 873 | memset(req, 0, sizeof(*req)); /* mark unused */ |
1429 | 874 | ||
1430 | dprintk("RPC: %4d release request %p\n", task->tk_pid, req); | 875 | dprintk("RPC: %4d release request %p\n", task->tk_pid, req); |
1431 | 876 | ||
1432 | spin_lock(&xprt->xprt_lock); | 877 | spin_lock(&xprt->reserve_lock); |
1433 | list_add(&req->rq_list, &xprt->free); | 878 | list_add(&req->rq_list, &xprt->free); |
1434 | xprt_clear_backlog(xprt); | 879 | rpc_wake_up_next(&xprt->backlog); |
1435 | spin_unlock(&xprt->xprt_lock); | 880 | spin_unlock(&xprt->reserve_lock); |
1436 | } | ||
1437 | |||
1438 | /* | ||
1439 | * Set default timeout parameters | ||
1440 | */ | ||
1441 | static void | ||
1442 | xprt_default_timeout(struct rpc_timeout *to, int proto) | ||
1443 | { | ||
1444 | if (proto == IPPROTO_UDP) | ||
1445 | xprt_set_timeout(to, 5, 5 * HZ); | ||
1446 | else | ||
1447 | xprt_set_timeout(to, 5, 60 * HZ); | ||
1448 | } | 881 | } |
1449 | 882 | ||
1450 | /* | 883 | /** |
1451 | * Set constant timeout | 884 | * xprt_set_timeout - set constant RPC timeout |
885 | * @to: RPC timeout parameters to set up | ||
886 | * @retr: number of retries | ||
887 | * @incr: amount of increase after each retry | ||
888 | * | ||
1452 | */ | 889 | */ |
1453 | void | 890 | void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) |
1454 | xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr) | ||
1455 | { | 891 | { |
1456 | to->to_initval = | 892 | to->to_initval = |
1457 | to->to_increment = incr; | 893 | to->to_increment = incr; |
1458 | to->to_maxval = incr * retr; | 894 | to->to_maxval = to->to_initval + (incr * retr); |
1459 | to->to_retries = retr; | 895 | to->to_retries = retr; |
1460 | to->to_exponential = 0; | 896 | to->to_exponential = 0; |
1461 | } | 897 | } |
1462 | 898 | ||
1463 | unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; | 899 | static struct rpc_xprt *xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) |
1464 | unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; | ||
1465 | |||
1466 | /* | ||
1467 | * Initialize an RPC client | ||
1468 | */ | ||
1469 | static struct rpc_xprt * | ||
1470 | xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | ||
1471 | { | 900 | { |
901 | int result; | ||
1472 | struct rpc_xprt *xprt; | 902 | struct rpc_xprt *xprt; |
1473 | unsigned int entries; | ||
1474 | size_t slot_table_size; | ||
1475 | struct rpc_rqst *req; | 903 | struct rpc_rqst *req; |
1476 | 904 | ||
1477 | dprintk("RPC: setting up %s transport...\n", | ||
1478 | proto == IPPROTO_UDP? "UDP" : "TCP"); | ||
1479 | |||
1480 | entries = (proto == IPPROTO_TCP)? | ||
1481 | xprt_tcp_slot_table_entries : xprt_udp_slot_table_entries; | ||
1482 | |||
1483 | if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) | 905 | if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) |
1484 | return ERR_PTR(-ENOMEM); | 906 | return ERR_PTR(-ENOMEM); |
1485 | memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ | 907 | memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ |
1486 | xprt->max_reqs = entries; | ||
1487 | slot_table_size = entries * sizeof(xprt->slot[0]); | ||
1488 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1489 | if (xprt->slot == NULL) { | ||
1490 | kfree(xprt); | ||
1491 | return ERR_PTR(-ENOMEM); | ||
1492 | } | ||
1493 | memset(xprt->slot, 0, slot_table_size); | ||
1494 | 908 | ||
1495 | xprt->addr = *ap; | 909 | xprt->addr = *ap; |
1496 | xprt->prot = proto; | 910 | |
1497 | xprt->stream = (proto == IPPROTO_TCP)? 1 : 0; | 911 | switch (proto) { |
1498 | if (xprt->stream) { | 912 | case IPPROTO_UDP: |
1499 | xprt->cwnd = RPC_MAXCWND(xprt); | 913 | result = xs_setup_udp(xprt, to); |
1500 | xprt->nocong = 1; | 914 | break; |
1501 | xprt->max_payload = (1U << 31) - 1; | 915 | case IPPROTO_TCP: |
1502 | } else { | 916 | result = xs_setup_tcp(xprt, to); |
1503 | xprt->cwnd = RPC_INITCWND; | 917 | break; |
1504 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | 918 | default: |
919 | printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", | ||
920 | proto); | ||
921 | result = -EIO; | ||
922 | break; | ||
923 | } | ||
924 | if (result) { | ||
925 | kfree(xprt); | ||
926 | return ERR_PTR(result); | ||
1505 | } | 927 | } |
1506 | spin_lock_init(&xprt->sock_lock); | 928 | |
1507 | spin_lock_init(&xprt->xprt_lock); | 929 | spin_lock_init(&xprt->transport_lock); |
1508 | init_waitqueue_head(&xprt->cong_wait); | 930 | spin_lock_init(&xprt->reserve_lock); |
1509 | 931 | ||
1510 | INIT_LIST_HEAD(&xprt->free); | 932 | INIT_LIST_HEAD(&xprt->free); |
1511 | INIT_LIST_HEAD(&xprt->recv); | 933 | INIT_LIST_HEAD(&xprt->recv); |
1512 | INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); | 934 | INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt); |
1513 | INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); | ||
1514 | init_timer(&xprt->timer); | 935 | init_timer(&xprt->timer); |
1515 | xprt->timer.function = xprt_init_autodisconnect; | 936 | xprt->timer.function = xprt_init_autodisconnect; |
1516 | xprt->timer.data = (unsigned long) xprt; | 937 | xprt->timer.data = (unsigned long) xprt; |
1517 | xprt->last_used = jiffies; | 938 | xprt->last_used = jiffies; |
1518 | xprt->port = XPRT_MAX_RESVPORT; | 939 | xprt->cwnd = RPC_INITCWND; |
1519 | |||
1520 | /* Set timeout parameters */ | ||
1521 | if (to) { | ||
1522 | xprt->timeout = *to; | ||
1523 | } else | ||
1524 | xprt_default_timeout(&xprt->timeout, xprt->prot); | ||
1525 | 940 | ||
1526 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); | 941 | rpc_init_wait_queue(&xprt->pending, "xprt_pending"); |
1527 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); | 942 | rpc_init_wait_queue(&xprt->sending, "xprt_sending"); |
@@ -1529,139 +944,25 @@ xprt_setup(int proto, struct sockaddr_in *ap, struct rpc_timeout *to) | |||
1529 | rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); | 944 | rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); |
1530 | 945 | ||
1531 | /* initialize free list */ | 946 | /* initialize free list */ |
1532 | for (req = &xprt->slot[entries-1]; req >= &xprt->slot[0]; req--) | 947 | for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) |
1533 | list_add(&req->rq_list, &xprt->free); | 948 | list_add(&req->rq_list, &xprt->free); |
1534 | 949 | ||
1535 | xprt_init_xid(xprt); | 950 | xprt_init_xid(xprt); |
1536 | 951 | ||
1537 | /* Check whether we want to use a reserved port */ | ||
1538 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1539 | |||
1540 | dprintk("RPC: created transport %p with %u slots\n", xprt, | 952 | dprintk("RPC: created transport %p with %u slots\n", xprt, |
1541 | xprt->max_reqs); | 953 | xprt->max_reqs); |
1542 | 954 | ||
1543 | return xprt; | 955 | return xprt; |
1544 | } | 956 | } |
1545 | 957 | ||
1546 | /* | 958 | /** |
1547 | * Bind to a reserved port | 959 | * xprt_create_proto - create an RPC client transport |
1548 | */ | 960 | * @proto: requested transport protocol |
1549 | static inline int xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | 961 | * @sap: remote peer's address |
1550 | { | 962 | * @to: timeout parameters for new transport |
1551 | struct sockaddr_in myaddr = { | 963 | * |
1552 | .sin_family = AF_INET, | ||
1553 | }; | ||
1554 | int err, port; | ||
1555 | |||
1556 | /* Were we already bound to a given port? Try to reuse it */ | ||
1557 | port = xprt->port; | ||
1558 | do { | ||
1559 | myaddr.sin_port = htons(port); | ||
1560 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
1561 | sizeof(myaddr)); | ||
1562 | if (err == 0) { | ||
1563 | xprt->port = port; | ||
1564 | return 0; | ||
1565 | } | ||
1566 | if (--port == 0) | ||
1567 | port = XPRT_MAX_RESVPORT; | ||
1568 | } while (err == -EADDRINUSE && port != xprt->port); | ||
1569 | |||
1570 | printk("RPC: Can't bind to reserved port (%d).\n", -err); | ||
1571 | return err; | ||
1572 | } | ||
1573 | |||
1574 | static void | ||
1575 | xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock) | ||
1576 | { | ||
1577 | struct sock *sk = sock->sk; | ||
1578 | |||
1579 | if (xprt->inet) | ||
1580 | return; | ||
1581 | |||
1582 | write_lock_bh(&sk->sk_callback_lock); | ||
1583 | sk->sk_user_data = xprt; | ||
1584 | xprt->old_data_ready = sk->sk_data_ready; | ||
1585 | xprt->old_state_change = sk->sk_state_change; | ||
1586 | xprt->old_write_space = sk->sk_write_space; | ||
1587 | if (xprt->prot == IPPROTO_UDP) { | ||
1588 | sk->sk_data_ready = udp_data_ready; | ||
1589 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
1590 | xprt_set_connected(xprt); | ||
1591 | } else { | ||
1592 | tcp_sk(sk)->nonagle = 1; /* disable Nagle's algorithm */ | ||
1593 | sk->sk_data_ready = tcp_data_ready; | ||
1594 | sk->sk_state_change = tcp_state_change; | ||
1595 | xprt_clear_connected(xprt); | ||
1596 | } | ||
1597 | sk->sk_write_space = xprt_write_space; | ||
1598 | |||
1599 | /* Reset to new socket */ | ||
1600 | xprt->sock = sock; | ||
1601 | xprt->inet = sk; | ||
1602 | write_unlock_bh(&sk->sk_callback_lock); | ||
1603 | |||
1604 | return; | ||
1605 | } | ||
1606 | |||
1607 | /* | ||
1608 | * Set socket buffer length | ||
1609 | */ | ||
1610 | void | ||
1611 | xprt_sock_setbufsize(struct rpc_xprt *xprt) | ||
1612 | { | ||
1613 | struct sock *sk = xprt->inet; | ||
1614 | |||
1615 | if (xprt->stream) | ||
1616 | return; | ||
1617 | if (xprt->rcvsize) { | ||
1618 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
1619 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
1620 | } | ||
1621 | if (xprt->sndsize) { | ||
1622 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
1623 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
1624 | sk->sk_write_space(sk); | ||
1625 | } | ||
1626 | } | ||
1627 | |||
1628 | /* | ||
1629 | * Datastream sockets are created here, but xprt_connect will create | ||
1630 | * and connect stream sockets. | ||
1631 | */ | ||
1632 | static struct socket * xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) | ||
1633 | { | ||
1634 | struct socket *sock; | ||
1635 | int type, err; | ||
1636 | |||
1637 | dprintk("RPC: xprt_create_socket(%s %d)\n", | ||
1638 | (proto == IPPROTO_UDP)? "udp" : "tcp", proto); | ||
1639 | |||
1640 | type = (proto == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; | ||
1641 | |||
1642 | if ((err = sock_create_kern(PF_INET, type, proto, &sock)) < 0) { | ||
1643 | printk("RPC: can't create socket (%d).\n", -err); | ||
1644 | return NULL; | ||
1645 | } | ||
1646 | |||
1647 | /* If the caller has the capability, bind to a reserved port */ | ||
1648 | if (resvport && xprt_bindresvport(xprt, sock) < 0) { | ||
1649 | printk("RPC: can't bind to reserved port.\n"); | ||
1650 | goto failed; | ||
1651 | } | ||
1652 | |||
1653 | return sock; | ||
1654 | |||
1655 | failed: | ||
1656 | sock_release(sock); | ||
1657 | return NULL; | ||
1658 | } | ||
1659 | |||
1660 | /* | ||
1661 | * Create an RPC client transport given the protocol and peer address. | ||
1662 | */ | 964 | */ |
1663 | struct rpc_xprt * | 965 | struct rpc_xprt *xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) |
1664 | xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | ||
1665 | { | 966 | { |
1666 | struct rpc_xprt *xprt; | 967 | struct rpc_xprt *xprt; |
1667 | 968 | ||
@@ -1673,46 +974,26 @@ xprt_create_proto(int proto, struct sockaddr_in *sap, struct rpc_timeout *to) | |||
1673 | return xprt; | 974 | return xprt; |
1674 | } | 975 | } |
1675 | 976 | ||
1676 | /* | 977 | static void xprt_shutdown(struct rpc_xprt *xprt) |
1677 | * Prepare for transport shutdown. | ||
1678 | */ | ||
1679 | static void | ||
1680 | xprt_shutdown(struct rpc_xprt *xprt) | ||
1681 | { | 978 | { |
1682 | xprt->shutdown = 1; | 979 | xprt->shutdown = 1; |
1683 | rpc_wake_up(&xprt->sending); | 980 | rpc_wake_up(&xprt->sending); |
1684 | rpc_wake_up(&xprt->resend); | 981 | rpc_wake_up(&xprt->resend); |
1685 | rpc_wake_up(&xprt->pending); | 982 | xprt_wake_pending_tasks(xprt, -EIO); |
1686 | rpc_wake_up(&xprt->backlog); | 983 | rpc_wake_up(&xprt->backlog); |
1687 | wake_up(&xprt->cong_wait); | ||
1688 | del_timer_sync(&xprt->timer); | 984 | del_timer_sync(&xprt->timer); |
1689 | |||
1690 | /* synchronously wait for connect worker to finish */ | ||
1691 | cancel_delayed_work(&xprt->sock_connect); | ||
1692 | flush_scheduled_work(); | ||
1693 | } | 985 | } |
1694 | 986 | ||
1695 | /* | 987 | /** |
1696 | * Clear the xprt backlog queue | 988 | * xprt_destroy - destroy an RPC transport, killing off all requests. |
1697 | */ | 989 | * @xprt: transport to destroy |
1698 | static int | 990 | * |
1699 | xprt_clear_backlog(struct rpc_xprt *xprt) { | ||
1700 | rpc_wake_up_next(&xprt->backlog); | ||
1701 | wake_up(&xprt->cong_wait); | ||
1702 | return 1; | ||
1703 | } | ||
1704 | |||
1705 | /* | ||
1706 | * Destroy an RPC transport, killing off all requests. | ||
1707 | */ | 991 | */ |
1708 | int | 992 | int xprt_destroy(struct rpc_xprt *xprt) |
1709 | xprt_destroy(struct rpc_xprt *xprt) | ||
1710 | { | 993 | { |
1711 | dprintk("RPC: destroying transport %p\n", xprt); | 994 | dprintk("RPC: destroying transport %p\n", xprt); |
1712 | xprt_shutdown(xprt); | 995 | xprt_shutdown(xprt); |
1713 | xprt_disconnect(xprt); | 996 | xprt->ops->destroy(xprt); |
1714 | xprt_close(xprt); | ||
1715 | kfree(xprt->slot); | ||
1716 | kfree(xprt); | 997 | kfree(xprt); |
1717 | 998 | ||
1718 | return 0; | 999 | return 0; |
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c new file mode 100644 index 000000000000..2e1529217e65 --- /dev/null +++ b/net/sunrpc/xprtsock.c | |||
@@ -0,0 +1,1252 @@ | |||
1 | /* | ||
2 | * linux/net/sunrpc/xprtsock.c | ||
3 | * | ||
4 | * Client-side transport implementation for sockets. | ||
5 | * | ||
6 | * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
7 | * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> | ||
8 | * TCP NFS related read + write fixes | ||
9 | * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> | ||
10 | * | ||
11 | * Rewrite of larges part of the code in order to stabilize TCP stuff. | ||
12 | * Fix behaviour when socket buffer is full. | ||
13 | * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> | ||
14 | * | ||
15 | * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> | ||
16 | */ | ||
17 | |||
18 | #include <linux/types.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/capability.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/pagemap.h> | ||
23 | #include <linux/errno.h> | ||
24 | #include <linux/socket.h> | ||
25 | #include <linux/in.h> | ||
26 | #include <linux/net.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/udp.h> | ||
29 | #include <linux/tcp.h> | ||
30 | #include <linux/sunrpc/clnt.h> | ||
31 | #include <linux/file.h> | ||
32 | |||
33 | #include <net/sock.h> | ||
34 | #include <net/checksum.h> | ||
35 | #include <net/udp.h> | ||
36 | #include <net/tcp.h> | ||
37 | |||
38 | /* | ||
39 | * How many times to try sending a request on a socket before waiting | ||
40 | * for the socket buffer to clear. | ||
41 | */ | ||
42 | #define XS_SENDMSG_RETRY (10U) | ||
43 | |||
44 | /* | ||
45 | * Time out for an RPC UDP socket connect. UDP socket connects are | ||
46 | * synchronous, but we set a timeout anyway in case of resource | ||
47 | * exhaustion on the local host. | ||
48 | */ | ||
49 | #define XS_UDP_CONN_TO (5U * HZ) | ||
50 | |||
51 | /* | ||
52 | * Wait duration for an RPC TCP connection to be established. Solaris | ||
53 | * NFS over TCP uses 60 seconds, for example, which is in line with how | ||
54 | * long a server takes to reboot. | ||
55 | */ | ||
56 | #define XS_TCP_CONN_TO (60U * HZ) | ||
57 | |||
58 | /* | ||
59 | * Wait duration for a reply from the RPC portmapper. | ||
60 | */ | ||
61 | #define XS_BIND_TO (60U * HZ) | ||
62 | |||
63 | /* | ||
64 | * Delay if a UDP socket connect error occurs. This is most likely some | ||
65 | * kind of resource problem on the local host. | ||
66 | */ | ||
67 | #define XS_UDP_REEST_TO (2U * HZ) | ||
68 | |||
69 | /* | ||
70 | * The reestablish timeout allows clients to delay for a bit before attempting | ||
71 | * to reconnect to a server that just dropped our connection. | ||
72 | * | ||
73 | * We implement an exponential backoff when trying to reestablish a TCP | ||
74 | * transport connection with the server. Some servers like to drop a TCP | ||
75 | * connection when they are overworked, so we start with a short timeout and | ||
76 | * increase over time if the server is down or not responding. | ||
77 | */ | ||
78 | #define XS_TCP_INIT_REEST_TO (3U * HZ) | ||
79 | #define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) | ||
80 | |||
81 | /* | ||
82 | * TCP idle timeout; client drops the transport socket if it is idle | ||
83 | * for this long. Note that we also timeout UDP sockets to prevent | ||
84 | * holding port numbers when there is no RPC traffic. | ||
85 | */ | ||
86 | #define XS_IDLE_DISC_TO (5U * 60 * HZ) | ||
87 | |||
88 | #ifdef RPC_DEBUG | ||
89 | # undef RPC_DEBUG_DATA | ||
90 | # define RPCDBG_FACILITY RPCDBG_TRANS | ||
91 | #endif | ||
92 | |||
93 | #ifdef RPC_DEBUG_DATA | ||
94 | static void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
95 | { | ||
96 | u8 *buf = (u8 *) packet; | ||
97 | int j; | ||
98 | |||
99 | dprintk("RPC: %s\n", msg); | ||
100 | for (j = 0; j < count && j < 128; j += 4) { | ||
101 | if (!(j & 31)) { | ||
102 | if (j) | ||
103 | dprintk("\n"); | ||
104 | dprintk("0x%04x ", j); | ||
105 | } | ||
106 | dprintk("%02x%02x%02x%02x ", | ||
107 | buf[j], buf[j+1], buf[j+2], buf[j+3]); | ||
108 | } | ||
109 | dprintk("\n"); | ||
110 | } | ||
111 | #else | ||
112 | static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) | ||
113 | { | ||
114 | /* NOP */ | ||
115 | } | ||
116 | #endif | ||
117 | |||
118 | #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) | ||
119 | |||
120 | static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
121 | { | ||
122 | struct kvec iov = { | ||
123 | .iov_base = xdr->head[0].iov_base + base, | ||
124 | .iov_len = len - base, | ||
125 | }; | ||
126 | struct msghdr msg = { | ||
127 | .msg_name = addr, | ||
128 | .msg_namelen = addrlen, | ||
129 | .msg_flags = XS_SENDMSG_FLAGS, | ||
130 | }; | ||
131 | |||
132 | if (xdr->len > len) | ||
133 | msg.msg_flags |= MSG_MORE; | ||
134 | |||
135 | if (likely(iov.iov_len)) | ||
136 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
137 | return kernel_sendmsg(sock, &msg, NULL, 0, 0); | ||
138 | } | ||
139 | |||
140 | static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len) | ||
141 | { | ||
142 | struct kvec iov = { | ||
143 | .iov_base = xdr->tail[0].iov_base + base, | ||
144 | .iov_len = len - base, | ||
145 | }; | ||
146 | struct msghdr msg = { | ||
147 | .msg_flags = XS_SENDMSG_FLAGS, | ||
148 | }; | ||
149 | |||
150 | return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * xs_sendpages - write pages directly to a socket | ||
155 | * @sock: socket to send on | ||
156 | * @addr: UDP only -- address of destination | ||
157 | * @addrlen: UDP only -- length of destination address | ||
158 | * @xdr: buffer containing this request | ||
159 | * @base: starting position in the buffer | ||
160 | * | ||
161 | */ | ||
162 | static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) | ||
163 | { | ||
164 | struct page **ppage = xdr->pages; | ||
165 | unsigned int len, pglen = xdr->page_len; | ||
166 | int err, ret = 0; | ||
167 | ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); | ||
168 | |||
169 | if (unlikely(!sock)) | ||
170 | return -ENOTCONN; | ||
171 | |||
172 | clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
173 | |||
174 | len = xdr->head[0].iov_len; | ||
175 | if (base < len || (addr != NULL && base == 0)) { | ||
176 | err = xs_send_head(sock, addr, addrlen, xdr, base, len); | ||
177 | if (ret == 0) | ||
178 | ret = err; | ||
179 | else if (err > 0) | ||
180 | ret += err; | ||
181 | if (err != (len - base)) | ||
182 | goto out; | ||
183 | base = 0; | ||
184 | } else | ||
185 | base -= len; | ||
186 | |||
187 | if (unlikely(pglen == 0)) | ||
188 | goto copy_tail; | ||
189 | if (unlikely(base >= pglen)) { | ||
190 | base -= pglen; | ||
191 | goto copy_tail; | ||
192 | } | ||
193 | if (base || xdr->page_base) { | ||
194 | pglen -= base; | ||
195 | base += xdr->page_base; | ||
196 | ppage += base >> PAGE_CACHE_SHIFT; | ||
197 | base &= ~PAGE_CACHE_MASK; | ||
198 | } | ||
199 | |||
200 | sendpage = sock->ops->sendpage ? : sock_no_sendpage; | ||
201 | do { | ||
202 | int flags = XS_SENDMSG_FLAGS; | ||
203 | |||
204 | len = PAGE_CACHE_SIZE; | ||
205 | if (base) | ||
206 | len -= base; | ||
207 | if (pglen < len) | ||
208 | len = pglen; | ||
209 | |||
210 | if (pglen != len || xdr->tail[0].iov_len != 0) | ||
211 | flags |= MSG_MORE; | ||
212 | |||
213 | /* Hmm... We might be dealing with highmem pages */ | ||
214 | if (PageHighMem(*ppage)) | ||
215 | sendpage = sock_no_sendpage; | ||
216 | err = sendpage(sock, *ppage, base, len, flags); | ||
217 | if (ret == 0) | ||
218 | ret = err; | ||
219 | else if (err > 0) | ||
220 | ret += err; | ||
221 | if (err != len) | ||
222 | goto out; | ||
223 | base = 0; | ||
224 | ppage++; | ||
225 | } while ((pglen -= len) != 0); | ||
226 | copy_tail: | ||
227 | len = xdr->tail[0].iov_len; | ||
228 | if (base < len) { | ||
229 | err = xs_send_tail(sock, xdr, base, len); | ||
230 | if (ret == 0) | ||
231 | ret = err; | ||
232 | else if (err > 0) | ||
233 | ret += err; | ||
234 | } | ||
235 | out: | ||
236 | return ret; | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * xs_nospace - place task on wait queue if transmit was incomplete | ||
241 | * @task: task to put to sleep | ||
242 | * | ||
243 | */ | ||
244 | static void xs_nospace(struct rpc_task *task) | ||
245 | { | ||
246 | struct rpc_rqst *req = task->tk_rqstp; | ||
247 | struct rpc_xprt *xprt = req->rq_xprt; | ||
248 | |||
249 | dprintk("RPC: %4d xmit incomplete (%u left of %u)\n", | ||
250 | task->tk_pid, req->rq_slen - req->rq_bytes_sent, | ||
251 | req->rq_slen); | ||
252 | |||
253 | if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) { | ||
254 | /* Protect against races with write_space */ | ||
255 | spin_lock_bh(&xprt->transport_lock); | ||
256 | |||
257 | /* Don't race with disconnect */ | ||
258 | if (!xprt_connected(xprt)) | ||
259 | task->tk_status = -ENOTCONN; | ||
260 | else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags)) | ||
261 | xprt_wait_for_buffer_space(task); | ||
262 | |||
263 | spin_unlock_bh(&xprt->transport_lock); | ||
264 | } else | ||
265 | /* Keep holding the socket if it is blocked */ | ||
266 | rpc_delay(task, HZ>>4); | ||
267 | } | ||
268 | |||
269 | /** | ||
270 | * xs_udp_send_request - write an RPC request to a UDP socket | ||
271 | * @task: address of RPC task that manages the state of an RPC request | ||
272 | * | ||
273 | * Return values: | ||
274 | * 0: The request has been sent | ||
275 | * EAGAIN: The socket was blocked, please call again later to | ||
276 | * complete the request | ||
277 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
278 | * other: Some other error occured, the request was not sent | ||
279 | */ | ||
280 | static int xs_udp_send_request(struct rpc_task *task) | ||
281 | { | ||
282 | struct rpc_rqst *req = task->tk_rqstp; | ||
283 | struct rpc_xprt *xprt = req->rq_xprt; | ||
284 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
285 | int status; | ||
286 | |||
287 | xs_pktdump("packet data:", | ||
288 | req->rq_svec->iov_base, | ||
289 | req->rq_svec->iov_len); | ||
290 | |||
291 | req->rq_xtime = jiffies; | ||
292 | status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr, | ||
293 | sizeof(xprt->addr), xdr, req->rq_bytes_sent); | ||
294 | |||
295 | dprintk("RPC: xs_udp_send_request(%u) = %d\n", | ||
296 | xdr->len - req->rq_bytes_sent, status); | ||
297 | |||
298 | if (likely(status >= (int) req->rq_slen)) | ||
299 | return 0; | ||
300 | |||
301 | /* Still some bytes left; set up for a retry later. */ | ||
302 | if (status > 0) | ||
303 | status = -EAGAIN; | ||
304 | |||
305 | switch (status) { | ||
306 | case -ENETUNREACH: | ||
307 | case -EPIPE: | ||
308 | case -ECONNREFUSED: | ||
309 | /* When the server has died, an ICMP port unreachable message | ||
310 | * prompts ECONNREFUSED. */ | ||
311 | break; | ||
312 | case -EAGAIN: | ||
313 | xs_nospace(task); | ||
314 | break; | ||
315 | default: | ||
316 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
317 | -status); | ||
318 | break; | ||
319 | } | ||
320 | |||
321 | return status; | ||
322 | } | ||
323 | |||
324 | static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) | ||
325 | { | ||
326 | u32 reclen = buf->len - sizeof(rpc_fraghdr); | ||
327 | rpc_fraghdr *base = buf->head[0].iov_base; | ||
328 | *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); | ||
329 | } | ||
330 | |||
331 | /** | ||
332 | * xs_tcp_send_request - write an RPC request to a TCP socket | ||
333 | * @task: address of RPC task that manages the state of an RPC request | ||
334 | * | ||
335 | * Return values: | ||
336 | * 0: The request has been sent | ||
337 | * EAGAIN: The socket was blocked, please call again later to | ||
338 | * complete the request | ||
339 | * ENOTCONN: Caller needs to invoke connect logic then call again | ||
340 | * other: Some other error occured, the request was not sent | ||
341 | * | ||
342 | * XXX: In the case of soft timeouts, should we eventually give up | ||
343 | * if sendmsg is not able to make progress? | ||
344 | */ | ||
345 | static int xs_tcp_send_request(struct rpc_task *task) | ||
346 | { | ||
347 | struct rpc_rqst *req = task->tk_rqstp; | ||
348 | struct rpc_xprt *xprt = req->rq_xprt; | ||
349 | struct xdr_buf *xdr = &req->rq_snd_buf; | ||
350 | int status, retry = 0; | ||
351 | |||
352 | xs_encode_tcp_record_marker(&req->rq_snd_buf); | ||
353 | |||
354 | xs_pktdump("packet data:", | ||
355 | req->rq_svec->iov_base, | ||
356 | req->rq_svec->iov_len); | ||
357 | |||
358 | /* Continue transmitting the packet/record. We must be careful | ||
359 | * to cope with writespace callbacks arriving _after_ we have | ||
360 | * called sendmsg(). */ | ||
361 | while (1) { | ||
362 | req->rq_xtime = jiffies; | ||
363 | status = xs_sendpages(xprt->sock, NULL, 0, xdr, | ||
364 | req->rq_bytes_sent); | ||
365 | |||
366 | dprintk("RPC: xs_tcp_send_request(%u) = %d\n", | ||
367 | xdr->len - req->rq_bytes_sent, status); | ||
368 | |||
369 | if (unlikely(status < 0)) | ||
370 | break; | ||
371 | |||
372 | /* If we've sent the entire packet, immediately | ||
373 | * reset the count of bytes sent. */ | ||
374 | req->rq_bytes_sent += status; | ||
375 | if (likely(req->rq_bytes_sent >= req->rq_slen)) { | ||
376 | req->rq_bytes_sent = 0; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | status = -EAGAIN; | ||
381 | if (retry++ > XS_SENDMSG_RETRY) | ||
382 | break; | ||
383 | } | ||
384 | |||
385 | switch (status) { | ||
386 | case -EAGAIN: | ||
387 | xs_nospace(task); | ||
388 | break; | ||
389 | case -ECONNREFUSED: | ||
390 | case -ECONNRESET: | ||
391 | case -ENOTCONN: | ||
392 | case -EPIPE: | ||
393 | status = -ENOTCONN; | ||
394 | break; | ||
395 | default: | ||
396 | dprintk("RPC: sendmsg returned unrecognized error %d\n", | ||
397 | -status); | ||
398 | xprt_disconnect(xprt); | ||
399 | break; | ||
400 | } | ||
401 | |||
402 | return status; | ||
403 | } | ||
404 | |||
405 | /** | ||
406 | * xs_close - close a socket | ||
407 | * @xprt: transport | ||
408 | * | ||
409 | * This is used when all requests are complete; ie, no DRC state remains | ||
410 | * on the server we want to save. | ||
411 | */ | ||
412 | static void xs_close(struct rpc_xprt *xprt) | ||
413 | { | ||
414 | struct socket *sock = xprt->sock; | ||
415 | struct sock *sk = xprt->inet; | ||
416 | |||
417 | if (!sk) | ||
418 | return; | ||
419 | |||
420 | dprintk("RPC: xs_close xprt %p\n", xprt); | ||
421 | |||
422 | write_lock_bh(&sk->sk_callback_lock); | ||
423 | xprt->inet = NULL; | ||
424 | xprt->sock = NULL; | ||
425 | |||
426 | sk->sk_user_data = NULL; | ||
427 | sk->sk_data_ready = xprt->old_data_ready; | ||
428 | sk->sk_state_change = xprt->old_state_change; | ||
429 | sk->sk_write_space = xprt->old_write_space; | ||
430 | write_unlock_bh(&sk->sk_callback_lock); | ||
431 | |||
432 | sk->sk_no_check = 0; | ||
433 | |||
434 | sock_release(sock); | ||
435 | } | ||
436 | |||
437 | /** | ||
438 | * xs_destroy - prepare to shutdown a transport | ||
439 | * @xprt: doomed transport | ||
440 | * | ||
441 | */ | ||
442 | static void xs_destroy(struct rpc_xprt *xprt) | ||
443 | { | ||
444 | dprintk("RPC: xs_destroy xprt %p\n", xprt); | ||
445 | |||
446 | cancel_delayed_work(&xprt->connect_worker); | ||
447 | flush_scheduled_work(); | ||
448 | |||
449 | xprt_disconnect(xprt); | ||
450 | xs_close(xprt); | ||
451 | kfree(xprt->slot); | ||
452 | } | ||
453 | |||
454 | static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) | ||
455 | { | ||
456 | return (struct rpc_xprt *) sk->sk_user_data; | ||
457 | } | ||
458 | |||
459 | /** | ||
460 | * xs_udp_data_ready - "data ready" callback for UDP sockets | ||
461 | * @sk: socket with data to read | ||
462 | * @len: how much data to read | ||
463 | * | ||
464 | */ | ||
465 | static void xs_udp_data_ready(struct sock *sk, int len) | ||
466 | { | ||
467 | struct rpc_task *task; | ||
468 | struct rpc_xprt *xprt; | ||
469 | struct rpc_rqst *rovr; | ||
470 | struct sk_buff *skb; | ||
471 | int err, repsize, copied; | ||
472 | u32 _xid, *xp; | ||
473 | |||
474 | read_lock(&sk->sk_callback_lock); | ||
475 | dprintk("RPC: xs_udp_data_ready...\n"); | ||
476 | if (!(xprt = xprt_from_sock(sk))) | ||
477 | goto out; | ||
478 | |||
479 | if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) | ||
480 | goto out; | ||
481 | |||
482 | if (xprt->shutdown) | ||
483 | goto dropit; | ||
484 | |||
485 | repsize = skb->len - sizeof(struct udphdr); | ||
486 | if (repsize < 4) { | ||
487 | dprintk("RPC: impossible RPC reply size %d!\n", repsize); | ||
488 | goto dropit; | ||
489 | } | ||
490 | |||
491 | /* Copy the XID from the skb... */ | ||
492 | xp = skb_header_pointer(skb, sizeof(struct udphdr), | ||
493 | sizeof(_xid), &_xid); | ||
494 | if (xp == NULL) | ||
495 | goto dropit; | ||
496 | |||
497 | /* Look up and lock the request corresponding to the given XID */ | ||
498 | spin_lock(&xprt->transport_lock); | ||
499 | rovr = xprt_lookup_rqst(xprt, *xp); | ||
500 | if (!rovr) | ||
501 | goto out_unlock; | ||
502 | task = rovr->rq_task; | ||
503 | |||
504 | if ((copied = rovr->rq_private_buf.buflen) > repsize) | ||
505 | copied = repsize; | ||
506 | |||
507 | /* Suck it into the iovec, verify checksum if not done by hw. */ | ||
508 | if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) | ||
509 | goto out_unlock; | ||
510 | |||
511 | /* Something worked... */ | ||
512 | dst_confirm(skb->dst); | ||
513 | |||
514 | xprt_adjust_cwnd(task, copied); | ||
515 | xprt_update_rtt(task); | ||
516 | xprt_complete_rqst(task, copied); | ||
517 | |||
518 | out_unlock: | ||
519 | spin_unlock(&xprt->transport_lock); | ||
520 | dropit: | ||
521 | skb_free_datagram(sk, skb); | ||
522 | out: | ||
523 | read_unlock(&sk->sk_callback_lock); | ||
524 | } | ||
525 | |||
526 | static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len) | ||
527 | { | ||
528 | if (len > desc->count) | ||
529 | len = desc->count; | ||
530 | if (skb_copy_bits(desc->skb, desc->offset, p, len)) { | ||
531 | dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n", | ||
532 | len, desc->count); | ||
533 | return 0; | ||
534 | } | ||
535 | desc->offset += len; | ||
536 | desc->count -= len; | ||
537 | dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n", | ||
538 | len, desc->count); | ||
539 | return len; | ||
540 | } | ||
541 | |||
542 | static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
543 | { | ||
544 | size_t len, used; | ||
545 | char *p; | ||
546 | |||
547 | p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset; | ||
548 | len = sizeof(xprt->tcp_recm) - xprt->tcp_offset; | ||
549 | used = xs_tcp_copy_data(desc, p, len); | ||
550 | xprt->tcp_offset += used; | ||
551 | if (used != len) | ||
552 | return; | ||
553 | |||
554 | xprt->tcp_reclen = ntohl(xprt->tcp_recm); | ||
555 | if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) | ||
556 | xprt->tcp_flags |= XPRT_LAST_FRAG; | ||
557 | else | ||
558 | xprt->tcp_flags &= ~XPRT_LAST_FRAG; | ||
559 | xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; | ||
560 | |||
561 | xprt->tcp_flags &= ~XPRT_COPY_RECM; | ||
562 | xprt->tcp_offset = 0; | ||
563 | |||
564 | /* Sanity check of the record length */ | ||
565 | if (unlikely(xprt->tcp_reclen < 4)) { | ||
566 | dprintk("RPC: invalid TCP record fragment length\n"); | ||
567 | xprt_disconnect(xprt); | ||
568 | return; | ||
569 | } | ||
570 | dprintk("RPC: reading TCP record fragment of length %d\n", | ||
571 | xprt->tcp_reclen); | ||
572 | } | ||
573 | |||
574 | static void xs_tcp_check_recm(struct rpc_xprt *xprt) | ||
575 | { | ||
576 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n", | ||
577 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags); | ||
578 | if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
579 | xprt->tcp_flags |= XPRT_COPY_RECM; | ||
580 | xprt->tcp_offset = 0; | ||
581 | if (xprt->tcp_flags & XPRT_LAST_FRAG) { | ||
582 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
583 | xprt->tcp_flags |= XPRT_COPY_XID; | ||
584 | xprt->tcp_copied = 0; | ||
585 | } | ||
586 | } | ||
587 | } | ||
588 | |||
589 | static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
590 | { | ||
591 | size_t len, used; | ||
592 | char *p; | ||
593 | |||
594 | len = sizeof(xprt->tcp_xid) - xprt->tcp_offset; | ||
595 | dprintk("RPC: reading XID (%Zu bytes)\n", len); | ||
596 | p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset; | ||
597 | used = xs_tcp_copy_data(desc, p, len); | ||
598 | xprt->tcp_offset += used; | ||
599 | if (used != len) | ||
600 | return; | ||
601 | xprt->tcp_flags &= ~XPRT_COPY_XID; | ||
602 | xprt->tcp_flags |= XPRT_COPY_DATA; | ||
603 | xprt->tcp_copied = 4; | ||
604 | dprintk("RPC: reading reply for XID %08x\n", | ||
605 | ntohl(xprt->tcp_xid)); | ||
606 | xs_tcp_check_recm(xprt); | ||
607 | } | ||
608 | |||
609 | static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
610 | { | ||
611 | struct rpc_rqst *req; | ||
612 | struct xdr_buf *rcvbuf; | ||
613 | size_t len; | ||
614 | ssize_t r; | ||
615 | |||
616 | /* Find and lock the request corresponding to this xid */ | ||
617 | spin_lock(&xprt->transport_lock); | ||
618 | req = xprt_lookup_rqst(xprt, xprt->tcp_xid); | ||
619 | if (!req) { | ||
620 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
621 | dprintk("RPC: XID %08x request not found!\n", | ||
622 | ntohl(xprt->tcp_xid)); | ||
623 | spin_unlock(&xprt->transport_lock); | ||
624 | return; | ||
625 | } | ||
626 | |||
627 | rcvbuf = &req->rq_private_buf; | ||
628 | len = desc->count; | ||
629 | if (len > xprt->tcp_reclen - xprt->tcp_offset) { | ||
630 | skb_reader_t my_desc; | ||
631 | |||
632 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
633 | memcpy(&my_desc, desc, sizeof(my_desc)); | ||
634 | my_desc.count = len; | ||
635 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
636 | &my_desc, xs_tcp_copy_data); | ||
637 | desc->count -= r; | ||
638 | desc->offset += r; | ||
639 | } else | ||
640 | r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, | ||
641 | desc, xs_tcp_copy_data); | ||
642 | |||
643 | if (r > 0) { | ||
644 | xprt->tcp_copied += r; | ||
645 | xprt->tcp_offset += r; | ||
646 | } | ||
647 | if (r != len) { | ||
648 | /* Error when copying to the receive buffer, | ||
649 | * usually because we weren't able to allocate | ||
650 | * additional buffer pages. All we can do now | ||
651 | * is turn off XPRT_COPY_DATA, so the request | ||
652 | * will not receive any additional updates, | ||
653 | * and time out. | ||
654 | * Any remaining data from this record will | ||
655 | * be discarded. | ||
656 | */ | ||
657 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
658 | dprintk("RPC: XID %08x truncated request\n", | ||
659 | ntohl(xprt->tcp_xid)); | ||
660 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
661 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
662 | goto out; | ||
663 | } | ||
664 | |||
665 | dprintk("RPC: XID %08x read %Zd bytes\n", | ||
666 | ntohl(xprt->tcp_xid), r); | ||
667 | dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n", | ||
668 | xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen); | ||
669 | |||
670 | if (xprt->tcp_copied == req->rq_private_buf.buflen) | ||
671 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
672 | else if (xprt->tcp_offset == xprt->tcp_reclen) { | ||
673 | if (xprt->tcp_flags & XPRT_LAST_FRAG) | ||
674 | xprt->tcp_flags &= ~XPRT_COPY_DATA; | ||
675 | } | ||
676 | |||
677 | out: | ||
678 | if (!(xprt->tcp_flags & XPRT_COPY_DATA)) | ||
679 | xprt_complete_rqst(req->rq_task, xprt->tcp_copied); | ||
680 | spin_unlock(&xprt->transport_lock); | ||
681 | xs_tcp_check_recm(xprt); | ||
682 | } | ||
683 | |||
684 | static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc) | ||
685 | { | ||
686 | size_t len; | ||
687 | |||
688 | len = xprt->tcp_reclen - xprt->tcp_offset; | ||
689 | if (len > desc->count) | ||
690 | len = desc->count; | ||
691 | desc->count -= len; | ||
692 | desc->offset += len; | ||
693 | xprt->tcp_offset += len; | ||
694 | dprintk("RPC: discarded %Zu bytes\n", len); | ||
695 | xs_tcp_check_recm(xprt); | ||
696 | } | ||
697 | |||
698 | static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) | ||
699 | { | ||
700 | struct rpc_xprt *xprt = rd_desc->arg.data; | ||
701 | skb_reader_t desc = { | ||
702 | .skb = skb, | ||
703 | .offset = offset, | ||
704 | .count = len, | ||
705 | .csum = 0 | ||
706 | }; | ||
707 | |||
708 | dprintk("RPC: xs_tcp_data_recv started\n"); | ||
709 | do { | ||
710 | /* Read in a new fragment marker if necessary */ | ||
711 | /* Can we ever really expect to get completely empty fragments? */ | ||
712 | if (xprt->tcp_flags & XPRT_COPY_RECM) { | ||
713 | xs_tcp_read_fraghdr(xprt, &desc); | ||
714 | continue; | ||
715 | } | ||
716 | /* Read in the xid if necessary */ | ||
717 | if (xprt->tcp_flags & XPRT_COPY_XID) { | ||
718 | xs_tcp_read_xid(xprt, &desc); | ||
719 | continue; | ||
720 | } | ||
721 | /* Read in the request data */ | ||
722 | if (xprt->tcp_flags & XPRT_COPY_DATA) { | ||
723 | xs_tcp_read_request(xprt, &desc); | ||
724 | continue; | ||
725 | } | ||
726 | /* Skip over any trailing bytes on short reads */ | ||
727 | xs_tcp_read_discard(xprt, &desc); | ||
728 | } while (desc.count); | ||
729 | dprintk("RPC: xs_tcp_data_recv done\n"); | ||
730 | return len - desc.count; | ||
731 | } | ||
732 | |||
733 | /** | ||
734 | * xs_tcp_data_ready - "data ready" callback for TCP sockets | ||
735 | * @sk: socket with data to read | ||
736 | * @bytes: how much data to read | ||
737 | * | ||
738 | */ | ||
739 | static void xs_tcp_data_ready(struct sock *sk, int bytes) | ||
740 | { | ||
741 | struct rpc_xprt *xprt; | ||
742 | read_descriptor_t rd_desc; | ||
743 | |||
744 | read_lock(&sk->sk_callback_lock); | ||
745 | dprintk("RPC: xs_tcp_data_ready...\n"); | ||
746 | if (!(xprt = xprt_from_sock(sk))) | ||
747 | goto out; | ||
748 | if (xprt->shutdown) | ||
749 | goto out; | ||
750 | |||
751 | /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ | ||
752 | rd_desc.arg.data = xprt; | ||
753 | rd_desc.count = 65536; | ||
754 | tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); | ||
755 | out: | ||
756 | read_unlock(&sk->sk_callback_lock); | ||
757 | } | ||
758 | |||
759 | /** | ||
760 | * xs_tcp_state_change - callback to handle TCP socket state changes | ||
761 | * @sk: socket whose state has changed | ||
762 | * | ||
763 | */ | ||
764 | static void xs_tcp_state_change(struct sock *sk) | ||
765 | { | ||
766 | struct rpc_xprt *xprt; | ||
767 | |||
768 | read_lock(&sk->sk_callback_lock); | ||
769 | if (!(xprt = xprt_from_sock(sk))) | ||
770 | goto out; | ||
771 | dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); | ||
772 | dprintk("RPC: state %x conn %d dead %d zapped %d\n", | ||
773 | sk->sk_state, xprt_connected(xprt), | ||
774 | sock_flag(sk, SOCK_DEAD), | ||
775 | sock_flag(sk, SOCK_ZAPPED)); | ||
776 | |||
777 | switch (sk->sk_state) { | ||
778 | case TCP_ESTABLISHED: | ||
779 | spin_lock_bh(&xprt->transport_lock); | ||
780 | if (!xprt_test_and_set_connected(xprt)) { | ||
781 | /* Reset TCP record info */ | ||
782 | xprt->tcp_offset = 0; | ||
783 | xprt->tcp_reclen = 0; | ||
784 | xprt->tcp_copied = 0; | ||
785 | xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID; | ||
786 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
787 | xprt_wake_pending_tasks(xprt, 0); | ||
788 | } | ||
789 | spin_unlock_bh(&xprt->transport_lock); | ||
790 | break; | ||
791 | case TCP_SYN_SENT: | ||
792 | case TCP_SYN_RECV: | ||
793 | break; | ||
794 | default: | ||
795 | xprt_disconnect(xprt); | ||
796 | break; | ||
797 | } | ||
798 | out: | ||
799 | read_unlock(&sk->sk_callback_lock); | ||
800 | } | ||
801 | |||
802 | /** | ||
803 | * xs_udp_write_space - callback invoked when socket buffer space | ||
804 | * becomes available | ||
805 | * @sk: socket whose state has changed | ||
806 | * | ||
807 | * Called when more output buffer space is available for this socket. | ||
808 | * We try not to wake our writers until they can make "significant" | ||
809 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
810 | * with a bunch of small requests. | ||
811 | */ | ||
812 | static void xs_udp_write_space(struct sock *sk) | ||
813 | { | ||
814 | read_lock(&sk->sk_callback_lock); | ||
815 | |||
816 | /* from net/core/sock.c:sock_def_write_space */ | ||
817 | if (sock_writeable(sk)) { | ||
818 | struct socket *sock; | ||
819 | struct rpc_xprt *xprt; | ||
820 | |||
821 | if (unlikely(!(sock = sk->sk_socket))) | ||
822 | goto out; | ||
823 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
824 | goto out; | ||
825 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
826 | goto out; | ||
827 | |||
828 | xprt_write_space(xprt); | ||
829 | } | ||
830 | |||
831 | out: | ||
832 | read_unlock(&sk->sk_callback_lock); | ||
833 | } | ||
834 | |||
835 | /** | ||
836 | * xs_tcp_write_space - callback invoked when socket buffer space | ||
837 | * becomes available | ||
838 | * @sk: socket whose state has changed | ||
839 | * | ||
840 | * Called when more output buffer space is available for this socket. | ||
841 | * We try not to wake our writers until they can make "significant" | ||
842 | * progress, otherwise we'll waste resources thrashing kernel_sendmsg | ||
843 | * with a bunch of small requests. | ||
844 | */ | ||
845 | static void xs_tcp_write_space(struct sock *sk) | ||
846 | { | ||
847 | read_lock(&sk->sk_callback_lock); | ||
848 | |||
849 | /* from net/core/stream.c:sk_stream_write_space */ | ||
850 | if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { | ||
851 | struct socket *sock; | ||
852 | struct rpc_xprt *xprt; | ||
853 | |||
854 | if (unlikely(!(sock = sk->sk_socket))) | ||
855 | goto out; | ||
856 | if (unlikely(!(xprt = xprt_from_sock(sk)))) | ||
857 | goto out; | ||
858 | if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) | ||
859 | goto out; | ||
860 | |||
861 | xprt_write_space(xprt); | ||
862 | } | ||
863 | |||
864 | out: | ||
865 | read_unlock(&sk->sk_callback_lock); | ||
866 | } | ||
867 | |||
868 | static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) | ||
869 | { | ||
870 | struct sock *sk = xprt->inet; | ||
871 | |||
872 | if (xprt->rcvsize) { | ||
873 | sk->sk_userlocks |= SOCK_RCVBUF_LOCK; | ||
874 | sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs * 2; | ||
875 | } | ||
876 | if (xprt->sndsize) { | ||
877 | sk->sk_userlocks |= SOCK_SNDBUF_LOCK; | ||
878 | sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2; | ||
879 | sk->sk_write_space(sk); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | /** | ||
884 | * xs_udp_set_buffer_size - set send and receive limits | ||
885 | * @xprt: generic transport | ||
886 | * @sndsize: requested size of send buffer, in bytes | ||
887 | * @rcvsize: requested size of receive buffer, in bytes | ||
888 | * | ||
889 | * Set socket send and receive buffer size limits. | ||
890 | */ | ||
891 | static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) | ||
892 | { | ||
893 | xprt->sndsize = 0; | ||
894 | if (sndsize) | ||
895 | xprt->sndsize = sndsize + 1024; | ||
896 | xprt->rcvsize = 0; | ||
897 | if (rcvsize) | ||
898 | xprt->rcvsize = rcvsize + 1024; | ||
899 | |||
900 | xs_udp_do_set_buffer_size(xprt); | ||
901 | } | ||
902 | |||
903 | /** | ||
904 | * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport | ||
905 | * @task: task that timed out | ||
906 | * | ||
907 | * Adjust the congestion window after a retransmit timeout has occurred. | ||
908 | */ | ||
909 | static void xs_udp_timer(struct rpc_task *task) | ||
910 | { | ||
911 | xprt_adjust_cwnd(task, -ETIMEDOUT); | ||
912 | } | ||
913 | |||
914 | static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) | ||
915 | { | ||
916 | struct sockaddr_in myaddr = { | ||
917 | .sin_family = AF_INET, | ||
918 | }; | ||
919 | int err; | ||
920 | unsigned short port = xprt->port; | ||
921 | |||
922 | do { | ||
923 | myaddr.sin_port = htons(port); | ||
924 | err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, | ||
925 | sizeof(myaddr)); | ||
926 | if (err == 0) { | ||
927 | xprt->port = port; | ||
928 | dprintk("RPC: xs_bindresvport bound to port %u\n", | ||
929 | port); | ||
930 | return 0; | ||
931 | } | ||
932 | if (port <= xprt_min_resvport) | ||
933 | port = xprt_max_resvport; | ||
934 | else | ||
935 | port--; | ||
936 | } while (err == -EADDRINUSE && port != xprt->port); | ||
937 | |||
938 | dprintk("RPC: can't bind to reserved port (%d).\n", -err); | ||
939 | return err; | ||
940 | } | ||
941 | |||
942 | /** | ||
943 | * xs_udp_connect_worker - set up a UDP socket | ||
944 | * @args: RPC transport to connect | ||
945 | * | ||
946 | * Invoked by a work queue tasklet. | ||
947 | */ | ||
948 | static void xs_udp_connect_worker(void *args) | ||
949 | { | ||
950 | struct rpc_xprt *xprt = (struct rpc_xprt *) args; | ||
951 | struct socket *sock = xprt->sock; | ||
952 | int err, status = -EIO; | ||
953 | |||
954 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
955 | goto out; | ||
956 | |||
957 | dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt); | ||
958 | |||
959 | /* Start by resetting any existing state */ | ||
960 | xs_close(xprt); | ||
961 | |||
962 | if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { | ||
963 | dprintk("RPC: can't create UDP transport socket (%d).\n", -err); | ||
964 | goto out; | ||
965 | } | ||
966 | |||
967 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
968 | sock_release(sock); | ||
969 | goto out; | ||
970 | } | ||
971 | |||
972 | if (!xprt->inet) { | ||
973 | struct sock *sk = sock->sk; | ||
974 | |||
975 | write_lock_bh(&sk->sk_callback_lock); | ||
976 | |||
977 | sk->sk_user_data = xprt; | ||
978 | xprt->old_data_ready = sk->sk_data_ready; | ||
979 | xprt->old_state_change = sk->sk_state_change; | ||
980 | xprt->old_write_space = sk->sk_write_space; | ||
981 | sk->sk_data_ready = xs_udp_data_ready; | ||
982 | sk->sk_write_space = xs_udp_write_space; | ||
983 | sk->sk_no_check = UDP_CSUM_NORCV; | ||
984 | |||
985 | xprt_set_connected(xprt); | ||
986 | |||
987 | /* Reset to new socket */ | ||
988 | xprt->sock = sock; | ||
989 | xprt->inet = sk; | ||
990 | |||
991 | write_unlock_bh(&sk->sk_callback_lock); | ||
992 | } | ||
993 | xs_udp_do_set_buffer_size(xprt); | ||
994 | status = 0; | ||
995 | out: | ||
996 | xprt_wake_pending_tasks(xprt, status); | ||
997 | xprt_clear_connecting(xprt); | ||
998 | } | ||
999 | |||
1000 | /* | ||
1001 | * We need to preserve the port number so the reply cache on the server can | ||
1002 | * find our cached RPC replies when we get around to reconnecting. | ||
1003 | */ | ||
1004 | static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) | ||
1005 | { | ||
1006 | int result; | ||
1007 | struct socket *sock = xprt->sock; | ||
1008 | struct sockaddr any; | ||
1009 | |||
1010 | dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); | ||
1011 | |||
1012 | /* | ||
1013 | * Disconnect the transport socket by doing a connect operation | ||
1014 | * with AF_UNSPEC. This should return immediately... | ||
1015 | */ | ||
1016 | memset(&any, 0, sizeof(any)); | ||
1017 | any.sa_family = AF_UNSPEC; | ||
1018 | result = sock->ops->connect(sock, &any, sizeof(any), 0); | ||
1019 | if (result) | ||
1020 | dprintk("RPC: AF_UNSPEC connect return code %d\n", | ||
1021 | result); | ||
1022 | } | ||
1023 | |||
1024 | /** | ||
1025 | * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint | ||
1026 | * @args: RPC transport to connect | ||
1027 | * | ||
1028 | * Invoked by a work queue tasklet. | ||
1029 | */ | ||
1030 | static void xs_tcp_connect_worker(void *args) | ||
1031 | { | ||
1032 | struct rpc_xprt *xprt = (struct rpc_xprt *)args; | ||
1033 | struct socket *sock = xprt->sock; | ||
1034 | int err, status = -EIO; | ||
1035 | |||
1036 | if (xprt->shutdown || xprt->addr.sin_port == 0) | ||
1037 | goto out; | ||
1038 | |||
1039 | dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt); | ||
1040 | |||
1041 | if (!xprt->sock) { | ||
1042 | /* start from scratch */ | ||
1043 | if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { | ||
1044 | dprintk("RPC: can't create TCP transport socket (%d).\n", -err); | ||
1045 | goto out; | ||
1046 | } | ||
1047 | |||
1048 | if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) { | ||
1049 | sock_release(sock); | ||
1050 | goto out; | ||
1051 | } | ||
1052 | } else | ||
1053 | /* "close" the socket, preserving the local port */ | ||
1054 | xs_tcp_reuse_connection(xprt); | ||
1055 | |||
1056 | if (!xprt->inet) { | ||
1057 | struct sock *sk = sock->sk; | ||
1058 | |||
1059 | write_lock_bh(&sk->sk_callback_lock); | ||
1060 | |||
1061 | sk->sk_user_data = xprt; | ||
1062 | xprt->old_data_ready = sk->sk_data_ready; | ||
1063 | xprt->old_state_change = sk->sk_state_change; | ||
1064 | xprt->old_write_space = sk->sk_write_space; | ||
1065 | sk->sk_data_ready = xs_tcp_data_ready; | ||
1066 | sk->sk_state_change = xs_tcp_state_change; | ||
1067 | sk->sk_write_space = xs_tcp_write_space; | ||
1068 | |||
1069 | /* socket options */ | ||
1070 | sk->sk_userlocks |= SOCK_BINDPORT_LOCK; | ||
1071 | sock_reset_flag(sk, SOCK_LINGER); | ||
1072 | tcp_sk(sk)->linger2 = 0; | ||
1073 | tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; | ||
1074 | |||
1075 | xprt_clear_connected(xprt); | ||
1076 | |||
1077 | /* Reset to new socket */ | ||
1078 | xprt->sock = sock; | ||
1079 | xprt->inet = sk; | ||
1080 | |||
1081 | write_unlock_bh(&sk->sk_callback_lock); | ||
1082 | } | ||
1083 | |||
1084 | /* Tell the socket layer to start connecting... */ | ||
1085 | status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, | ||
1086 | sizeof(xprt->addr), O_NONBLOCK); | ||
1087 | dprintk("RPC: %p connect status %d connected %d sock state %d\n", | ||
1088 | xprt, -status, xprt_connected(xprt), sock->sk->sk_state); | ||
1089 | if (status < 0) { | ||
1090 | switch (status) { | ||
1091 | case -EINPROGRESS: | ||
1092 | case -EALREADY: | ||
1093 | goto out_clear; | ||
1094 | case -ECONNREFUSED: | ||
1095 | case -ECONNRESET: | ||
1096 | /* retry with existing socket, after a delay */ | ||
1097 | break; | ||
1098 | default: | ||
1099 | /* get rid of existing socket, and retry */ | ||
1100 | xs_close(xprt); | ||
1101 | break; | ||
1102 | } | ||
1103 | } | ||
1104 | out: | ||
1105 | xprt_wake_pending_tasks(xprt, status); | ||
1106 | out_clear: | ||
1107 | xprt_clear_connecting(xprt); | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * xs_connect - connect a socket to a remote endpoint | ||
1112 | * @task: address of RPC task that manages state of connect request | ||
1113 | * | ||
1114 | * TCP: If the remote end dropped the connection, delay reconnecting. | ||
1115 | * | ||
1116 | * UDP socket connects are synchronous, but we use a work queue anyway | ||
1117 | * to guarantee that even unprivileged user processes can set up a | ||
1118 | * socket on a privileged port. | ||
1119 | * | ||
1120 | * If a UDP socket connect fails, the delay behavior here prevents | ||
1121 | * retry floods (hard mounts). | ||
1122 | */ | ||
1123 | static void xs_connect(struct rpc_task *task) | ||
1124 | { | ||
1125 | struct rpc_xprt *xprt = task->tk_xprt; | ||
1126 | |||
1127 | if (xprt_test_and_set_connecting(xprt)) | ||
1128 | return; | ||
1129 | |||
1130 | if (xprt->sock != NULL) { | ||
1131 | dprintk("RPC: xs_connect delayed xprt %p for %lu seconds\n", | ||
1132 | xprt, xprt->reestablish_timeout / HZ); | ||
1133 | schedule_delayed_work(&xprt->connect_worker, | ||
1134 | xprt->reestablish_timeout); | ||
1135 | xprt->reestablish_timeout <<= 1; | ||
1136 | if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) | ||
1137 | xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; | ||
1138 | } else { | ||
1139 | dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); | ||
1140 | schedule_work(&xprt->connect_worker); | ||
1141 | |||
1142 | /* flush_scheduled_work can sleep... */ | ||
1143 | if (!RPC_IS_ASYNC(task)) | ||
1144 | flush_scheduled_work(); | ||
1145 | } | ||
1146 | } | ||
1147 | |||
1148 | static struct rpc_xprt_ops xs_udp_ops = { | ||
1149 | .set_buffer_size = xs_udp_set_buffer_size, | ||
1150 | .reserve_xprt = xprt_reserve_xprt_cong, | ||
1151 | .release_xprt = xprt_release_xprt_cong, | ||
1152 | .connect = xs_connect, | ||
1153 | .send_request = xs_udp_send_request, | ||
1154 | .set_retrans_timeout = xprt_set_retrans_timeout_rtt, | ||
1155 | .timer = xs_udp_timer, | ||
1156 | .release_request = xprt_release_rqst_cong, | ||
1157 | .close = xs_close, | ||
1158 | .destroy = xs_destroy, | ||
1159 | }; | ||
1160 | |||
1161 | static struct rpc_xprt_ops xs_tcp_ops = { | ||
1162 | .reserve_xprt = xprt_reserve_xprt, | ||
1163 | .release_xprt = xprt_release_xprt, | ||
1164 | .connect = xs_connect, | ||
1165 | .send_request = xs_tcp_send_request, | ||
1166 | .set_retrans_timeout = xprt_set_retrans_timeout_def, | ||
1167 | .close = xs_close, | ||
1168 | .destroy = xs_destroy, | ||
1169 | }; | ||
1170 | |||
1171 | /** | ||
1172 | * xs_setup_udp - Set up transport to use a UDP socket | ||
1173 | * @xprt: transport to set up | ||
1174 | * @to: timeout parameters | ||
1175 | * | ||
1176 | */ | ||
1177 | int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
1178 | { | ||
1179 | size_t slot_table_size; | ||
1180 | |||
1181 | dprintk("RPC: setting up udp-ipv4 transport...\n"); | ||
1182 | |||
1183 | xprt->max_reqs = xprt_udp_slot_table_entries; | ||
1184 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
1185 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1186 | if (xprt->slot == NULL) | ||
1187 | return -ENOMEM; | ||
1188 | memset(xprt->slot, 0, slot_table_size); | ||
1189 | |||
1190 | xprt->prot = IPPROTO_UDP; | ||
1191 | xprt->port = xprt_max_resvport; | ||
1192 | xprt->tsh_size = 0; | ||
1193 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1194 | /* XXX: header size can vary due to auth type, IPv6, etc. */ | ||
1195 | xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); | ||
1196 | |||
1197 | INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt); | ||
1198 | xprt->bind_timeout = XS_BIND_TO; | ||
1199 | xprt->connect_timeout = XS_UDP_CONN_TO; | ||
1200 | xprt->reestablish_timeout = XS_UDP_REEST_TO; | ||
1201 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1202 | |||
1203 | xprt->ops = &xs_udp_ops; | ||
1204 | |||
1205 | if (to) | ||
1206 | xprt->timeout = *to; | ||
1207 | else | ||
1208 | xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); | ||
1209 | |||
1210 | return 0; | ||
1211 | } | ||
1212 | |||
1213 | /** | ||
1214 | * xs_setup_tcp - Set up transport to use a TCP socket | ||
1215 | * @xprt: transport to set up | ||
1216 | * @to: timeout parameters | ||
1217 | * | ||
1218 | */ | ||
1219 | int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) | ||
1220 | { | ||
1221 | size_t slot_table_size; | ||
1222 | |||
1223 | dprintk("RPC: setting up tcp-ipv4 transport...\n"); | ||
1224 | |||
1225 | xprt->max_reqs = xprt_tcp_slot_table_entries; | ||
1226 | slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]); | ||
1227 | xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); | ||
1228 | if (xprt->slot == NULL) | ||
1229 | return -ENOMEM; | ||
1230 | memset(xprt->slot, 0, slot_table_size); | ||
1231 | |||
1232 | xprt->prot = IPPROTO_TCP; | ||
1233 | xprt->port = xprt_max_resvport; | ||
1234 | xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); | ||
1235 | xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; | ||
1236 | xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; | ||
1237 | |||
1238 | INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt); | ||
1239 | xprt->bind_timeout = XS_BIND_TO; | ||
1240 | xprt->connect_timeout = XS_TCP_CONN_TO; | ||
1241 | xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; | ||
1242 | xprt->idle_timeout = XS_IDLE_DISC_TO; | ||
1243 | |||
1244 | xprt->ops = &xs_tcp_ops; | ||
1245 | |||
1246 | if (to) | ||
1247 | xprt->timeout = *to; | ||
1248 | else | ||
1249 | xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); | ||
1250 | |||
1251 | return 0; | ||
1252 | } | ||