aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/svcsock.c336
1 files changed, 188 insertions, 148 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b7d435c3f19e..af04f779ce9f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
387 return len; 387 return len;
388} 388}
389 389
390static int svc_partial_recvfrom(struct svc_rqst *rqstp,
391 struct kvec *iov, int nr,
392 int buflen, unsigned int base)
393{
394 size_t save_iovlen;
395 void __user *save_iovbase;
396 unsigned int i;
397 int ret;
398
399 if (base == 0)
400 return svc_recvfrom(rqstp, iov, nr, buflen);
401
402 for (i = 0; i < nr; i++) {
403 if (iov[i].iov_len > base)
404 break;
405 base -= iov[i].iov_len;
406 }
407 save_iovlen = iov[i].iov_len;
408 save_iovbase = iov[i].iov_base;
409 iov[i].iov_len -= base;
410 iov[i].iov_base += base;
411 ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
412 iov[i].iov_len = save_iovlen;
413 iov[i].iov_base = save_iovbase;
414 return ret;
415}
416
390/* 417/*
391 * Set socket snd and rcv buffer lengths 418 * Set socket snd and rcv buffer lengths
392 */ 419 */
@@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
409 lock_sock(sock->sk); 436 lock_sock(sock->sk);
410 sock->sk->sk_sndbuf = snd * 2; 437 sock->sk->sk_sndbuf = snd * 2;
411 sock->sk->sk_rcvbuf = rcv * 2; 438 sock->sk->sk_rcvbuf = rcv * 2;
412 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
413 sock->sk->sk_write_space(sock->sk); 439 sock->sk->sk_write_space(sock->sk);
414 release_sock(sock->sk); 440 release_sock(sock->sk);
415#endif 441#endif
@@ -884,6 +910,56 @@ failed:
884 return NULL; 910 return NULL;
885} 911}
886 912
913static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
914{
915 unsigned int i, len, npages;
916
917 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
918 return 0;
919 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
920 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
921 for (i = 0; i < npages; i++) {
922 if (rqstp->rq_pages[i] != NULL)
923 put_page(rqstp->rq_pages[i]);
924 BUG_ON(svsk->sk_pages[i] == NULL);
925 rqstp->rq_pages[i] = svsk->sk_pages[i];
926 svsk->sk_pages[i] = NULL;
927 }
928 rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
929 return len;
930}
931
932static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
933{
934 unsigned int i, len, npages;
935
936 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
937 return;
938 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
939 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
940 for (i = 0; i < npages; i++) {
941 svsk->sk_pages[i] = rqstp->rq_pages[i];
942 rqstp->rq_pages[i] = NULL;
943 }
944}
945
946static void svc_tcp_clear_pages(struct svc_sock *svsk)
947{
948 unsigned int i, len, npages;
949
950 if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
951 goto out;
952 len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
953 npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
954 for (i = 0; i < npages; i++) {
955 BUG_ON(svsk->sk_pages[i] == NULL);
956 put_page(svsk->sk_pages[i]);
957 svsk->sk_pages[i] = NULL;
958 }
959out:
960 svsk->sk_tcplen = 0;
961}
962
887/* 963/*
888 * Receive data. 964 * Receive data.
889 * If we haven't gotten the record length yet, get the next four bytes. 965 * If we haven't gotten the record length yet, get the next four bytes.
@@ -893,31 +969,15 @@ failed:
893static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) 969static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
894{ 970{
895 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 971 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
972 unsigned int want;
896 int len; 973 int len;
897 974
898 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
899 /* sndbuf needs to have room for one request
900 * per thread, otherwise we can stall even when the
901 * network isn't a bottleneck.
902 *
903 * We count all threads rather than threads in a
904 * particular pool, which provides an upper bound
905 * on the number of threads which will access the socket.
906 *
907 * rcvbuf just needs to be able to hold a few requests.
908 * Normally they will be removed from the queue
909 * as soon a a complete request arrives.
910 */
911 svc_sock_setbufsize(svsk->sk_sock,
912 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
913 3 * serv->sv_max_mesg);
914
915 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 975 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
916 976
917 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 977 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
918 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
919 struct kvec iov; 978 struct kvec iov;
920 979
980 want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
921 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; 981 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
922 iov.iov_len = want; 982 iov.iov_len = want;
923 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) 983 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
@@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
927 if (len < want) { 987 if (len < want) {
928 dprintk("svc: short recvfrom while reading record " 988 dprintk("svc: short recvfrom while reading record "
929 "length (%d of %d)\n", len, want); 989 "length (%d of %d)\n", len, want);
930 goto err_again; /* record header not complete */ 990 return -EAGAIN;
931 } 991 }
932 992
933 svsk->sk_reclen = ntohl(svsk->sk_reclen); 993 svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
954 } 1014 }
955 } 1015 }
956 1016
957 /* Check whether enough data is available */ 1017 if (svsk->sk_reclen < 8)
958 len = svc_recv_available(svsk); 1018 goto err_delete; /* client is nuts. */
959 if (len < 0)
960 goto error;
961 1019
962 if (len < svsk->sk_reclen) {
963 dprintk("svc: incomplete TCP record (%d of %d)\n",
964 len, svsk->sk_reclen);
965 goto err_again; /* record not complete */
966 }
967 len = svsk->sk_reclen; 1020 len = svsk->sk_reclen;
968 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
969 1021
970 return len; 1022 return len;
971 error: 1023error:
972 if (len == -EAGAIN) 1024 dprintk("RPC: TCP recv_record got %d\n", len);
973 dprintk("RPC: TCP recv_record got EAGAIN\n");
974 return len; 1025 return len;
975 err_delete: 1026err_delete:
976 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1027 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
977 err_again:
978 return -EAGAIN; 1028 return -EAGAIN;
979} 1029}
980 1030
981static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, 1031static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
982 struct rpc_rqst **reqpp, struct kvec *vec)
983{ 1032{
1033 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
984 struct rpc_rqst *req = NULL; 1034 struct rpc_rqst *req = NULL;
985 u32 *p; 1035 struct kvec *src, *dst;
986 u32 xid; 1036 __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
987 u32 calldir; 1037 __be32 xid;
988 int len; 1038 __be32 calldir;
989
990 len = svc_recvfrom(rqstp, vec, 1, 8);
991 if (len < 0)
992 goto error;
993 1039
994 p = (u32 *)rqstp->rq_arg.head[0].iov_base;
995 xid = *p++; 1040 xid = *p++;
996 calldir = *p; 1041 calldir = *p;
997 1042
998 if (calldir == 0) { 1043 if (bc_xprt)
999 /* REQUEST is the most common case */ 1044 req = xprt_lookup_rqst(bc_xprt, xid);
1000 vec[0] = rqstp->rq_arg.head[0];
1001 } else {
1002 /* REPLY */
1003 struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
1004
1005 if (bc_xprt)
1006 req = xprt_lookup_rqst(bc_xprt, xid);
1007
1008 if (!req) {
1009 printk(KERN_NOTICE
1010 "%s: Got unrecognized reply: "
1011 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1012 __func__, ntohl(calldir),
1013 bc_xprt, xid);
1014 vec[0] = rqstp->rq_arg.head[0];
1015 goto out;
1016 }
1017 1045
1018 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, 1046 if (!req) {
1019 sizeof(struct xdr_buf)); 1047 printk(KERN_NOTICE
1020 /* copy the xid and call direction */ 1048 "%s: Got unrecognized reply: "
1021 memcpy(req->rq_private_buf.head[0].iov_base, 1049 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1022 rqstp->rq_arg.head[0].iov_base, 8); 1050 __func__, ntohl(calldir),
1023 vec[0] = req->rq_private_buf.head[0]; 1051 bc_xprt, xid);
1052 return -EAGAIN;
1024 } 1053 }
1025 out: 1054
1026 vec[0].iov_base += 8; 1055 memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
1027 vec[0].iov_len -= 8; 1056 /*
1028 len = svsk->sk_reclen - 8; 1057 * XXX!: cheating for now! Only copying HEAD.
1029 error: 1058 * But we know this is good enough for now (in fact, for any
1030 *reqpp = req; 1059 * callback reply in the forseeable future).
1031 return len; 1060 */
1061 dst = &req->rq_private_buf.head[0];
1062 src = &rqstp->rq_arg.head[0];
1063 if (dst->iov_len < src->iov_len)
1064 return -EAGAIN; /* whatever; just giving up. */
1065 memcpy(dst->iov_base, src->iov_base, src->iov_len);
1066 xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
1067 rqstp->rq_arg.len = 0;
1068 return 0;
1032} 1069}
1033 1070
1071static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
1072{
1073 int i = 0;
1074 int t = 0;
1075
1076 while (t < len) {
1077 vec[i].iov_base = page_address(pages[i]);
1078 vec[i].iov_len = PAGE_SIZE;
1079 i++;
1080 t += PAGE_SIZE;
1081 }
1082 return i;
1083}
1084
1085
1034/* 1086/*
1035 * Receive data from a TCP socket. 1087 * Receive data from a TCP socket.
1036 */ 1088 */
@@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1041 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 1093 struct svc_serv *serv = svsk->sk_xprt.xpt_server;
1042 int len; 1094 int len;
1043 struct kvec *vec; 1095 struct kvec *vec;
1044 int pnum, vlen; 1096 unsigned int want, base;
1045 struct rpc_rqst *req = NULL; 1097 __be32 *p;
1098 __be32 calldir;
1099 int pnum;
1046 1100
1047 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 1101 dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
1048 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 1102 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
1053 if (len < 0) 1107 if (len < 0)
1054 goto error; 1108 goto error;
1055 1109
1110 base = svc_tcp_restore_pages(svsk, rqstp);
1111 want = svsk->sk_reclen - base;
1112
1056 vec = rqstp->rq_vec; 1113 vec = rqstp->rq_vec;
1057 vec[0] = rqstp->rq_arg.head[0];
1058 vlen = PAGE_SIZE;
1059 1114
1060 /* 1115 pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
1061 * We have enough data for the whole tcp record. Let's try and read the 1116 svsk->sk_reclen);
1062 * first 8 bytes to get the xid and the call direction. We can use this
1063 * to figure out if this is a call or a reply to a callback. If
1064 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
1065 * In that case, don't bother with the calldir and just read the data.
1066 * It will be rejected in svc_process.
1067 */
1068 if (len >= 8) {
1069 len = svc_process_calldir(svsk, rqstp, &req, vec);
1070 if (len < 0)
1071 goto err_again;
1072 vlen -= 8;
1073 }
1074 1117
1075 pnum = 1;
1076 while (vlen < len) {
1077 vec[pnum].iov_base = (req) ?
1078 page_address(req->rq_private_buf.pages[pnum - 1]) :
1079 page_address(rqstp->rq_pages[pnum]);
1080 vec[pnum].iov_len = PAGE_SIZE;
1081 pnum++;
1082 vlen += PAGE_SIZE;
1083 }
1084 rqstp->rq_respages = &rqstp->rq_pages[pnum]; 1118 rqstp->rq_respages = &rqstp->rq_pages[pnum];
1085 1119
1086 /* Now receive data */ 1120 /* Now receive data */
1087 len = svc_recvfrom(rqstp, vec, pnum, len); 1121 len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
1088 if (len < 0) 1122 if (len >= 0)
1089 goto err_again; 1123 svsk->sk_tcplen += len;
1090 1124 if (len != want) {
1091 /* 1125 if (len < 0 && len != -EAGAIN)
1092 * Account for the 8 bytes we read earlier 1126 goto err_other;
1093 */ 1127 svc_tcp_save_pages(svsk, rqstp);
1094 len += 8; 1128 dprintk("svc: incomplete TCP record (%d of %d)\n",
1095 1129 svsk->sk_tcplen, svsk->sk_reclen);
1096 if (req) { 1130 goto err_noclose;
1097 xprt_complete_rqst(req->rq_task, len);
1098 len = 0;
1099 goto out;
1100 } 1131 }
1101 dprintk("svc: TCP complete record (%d bytes)\n", len); 1132
1102 rqstp->rq_arg.len = len; 1133 rqstp->rq_arg.len = svsk->sk_reclen;
1103 rqstp->rq_arg.page_base = 0; 1134 rqstp->rq_arg.page_base = 0;
1104 if (len <= rqstp->rq_arg.head[0].iov_len) { 1135 if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
1105 rqstp->rq_arg.head[0].iov_len = len; 1136 rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
1106 rqstp->rq_arg.page_len = 0; 1137 rqstp->rq_arg.page_len = 0;
1107 } else { 1138 } else
1108 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 1139 rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
1109 }
1110 1140
1111 rqstp->rq_xprt_ctxt = NULL; 1141 rqstp->rq_xprt_ctxt = NULL;
1112 rqstp->rq_prot = IPPROTO_TCP; 1142 rqstp->rq_prot = IPPROTO_TCP;
1113 1143
1114out: 1144 p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
1145 calldir = p[1];
1146 if (calldir)
1147 len = receive_cb_reply(svsk, rqstp);
1148
1115 /* Reset TCP read info */ 1149 /* Reset TCP read info */
1116 svsk->sk_reclen = 0; 1150 svsk->sk_reclen = 0;
1117 svsk->sk_tcplen = 0; 1151 svsk->sk_tcplen = 0;
1152 /* If we have more data, signal svc_xprt_enqueue() to try again */
1153 if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
1154 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1155
1156 if (len < 0)
1157 goto error;
1118 1158
1119 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 1159 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
1120 if (serv->sv_stats) 1160 if (serv->sv_stats)
1121 serv->sv_stats->nettcpcnt++; 1161 serv->sv_stats->nettcpcnt++;
1122 1162
1123 return len; 1163 dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
1164 return rqstp->rq_arg.len;
1124 1165
1125err_again:
1126 if (len == -EAGAIN) {
1127 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1128 return len;
1129 }
1130error: 1166error:
1131 if (len != -EAGAIN) { 1167 if (len != -EAGAIN)
1132 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 1168 goto err_other;
1133 svsk->sk_xprt.xpt_server->sv_name, -len); 1169 dprintk("RPC: TCP recvfrom got EAGAIN\n");
1134 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1135 }
1136 return -EAGAIN; 1170 return -EAGAIN;
1171err_other:
1172 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
1173 svsk->sk_xprt.xpt_server->sv_name, -len);
1174 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
1175err_noclose:
1176 return -EAGAIN; /* record not complete */
1137} 1177}
1138 1178
1139/* 1179/*
@@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1304 1344
1305 svsk->sk_reclen = 0; 1345 svsk->sk_reclen = 0;
1306 svsk->sk_tcplen = 0; 1346 svsk->sk_tcplen = 0;
1347 memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
1307 1348
1308 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1349 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1309 1350
1310 /* initialise setting must have enough space to
1311 * receive and respond to one request.
1312 * svc_tcp_recvfrom will re-adjust if necessary
1313 */
1314 svc_sock_setbufsize(svsk->sk_sock,
1315 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1316 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1317
1318 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1319 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1351 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1320 if (sk->sk_state != TCP_ESTABLISHED) 1352 if (sk->sk_state != TCP_ESTABLISHED)
1321 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1353 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1379 /* Initialize the socket */ 1411 /* Initialize the socket */
1380 if (sock->type == SOCK_DGRAM) 1412 if (sock->type == SOCK_DGRAM)
1381 svc_udp_init(svsk, serv); 1413 svc_udp_init(svsk, serv);
1382 else 1414 else {
1415 /* initialise setting must have enough space to
1416 * receive and respond to one request.
1417 */
1418 svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
1419 4 * serv->sv_max_mesg);
1383 svc_tcp_init(svsk, serv); 1420 svc_tcp_init(svsk, serv);
1421 }
1384 1422
1385 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1423 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1386 svsk, svsk->sk_sk); 1424 svsk, svsk->sk_sk);
@@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
1562 1600
1563 svc_sock_detach(xprt); 1601 svc_sock_detach(xprt);
1564 1602
1565 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1603 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
1604 svc_tcp_clear_pages(svsk);
1566 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); 1605 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
1606 }
1567} 1607}
1568 1608
1569/* 1609/*