aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2007-05-04 15:41:11 -0400
committerDavid S. Miller <davem@davemloft.net>2007-05-04 15:41:11 -0400
commit224711df5c00f7540b89f32a8225866031977f17 (patch)
tree7b515b1e0fff3b5c8ffc01400486beb300395f9e
parentda99f0565477899f08b76ffcb32afbf6fa95d64a (diff)
[AF_RXRPC]: Sort out MTU handling.
Sort out the MTU determination and handling in AF_RXRPC: (1) If it's present, parse the additional information supplied by the peer at the end of the ACK packet (struct ackinfo) to determine the MTU sizes that peer is willing to support. (2) Initialise the MTU size to that peer from the kernel's routing records. (3) Send ACKs rather than ACKALLs as the former carry the additional info, and the latter do not. (4) Declare the interface MTU size in outgoing ACKs as a maximum amount of data that can be stuffed into an RxRPC packet without it having to be fragmented to come in this computer's NIC. (5) If sendmsg() is given MSG_MORE then it should allocate an skb of the maximum size rather than one just big enough for the data it's got left to process on the theory that there is more data to come that it can append to that packet. This means, for example, that if AFS does a large StoreData op, all the packets barring the last will be filled to the maximum unfragmented size. Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/rxrpc/ar-ack.c80
-rw-r--r--net/rxrpc/ar-error.c2
-rw-r--r--net/rxrpc/ar-output.c2
-rw-r--r--net/rxrpc/ar-peer.c45
4 files changed, 115 insertions, 14 deletions
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index fc07a926df56..657ee69f2133 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -543,6 +543,38 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
543} 543}
544 544
545/* 545/*
546 * process the extra information that may be appended to an ACK packet
547 */
548static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
549 unsigned latest, int nAcks)
550{
551 struct rxrpc_ackinfo ackinfo;
552 struct rxrpc_peer *peer;
553 unsigned mtu;
554
555 if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
556 _leave(" [no ackinfo]");
557 return;
558 }
559
560 _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
561 latest,
562 ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
563 ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
564
565 mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
566
567 peer = call->conn->trans->peer;
568 if (mtu < peer->maxdata) {
569 spin_lock_bh(&peer->lock);
570 peer->maxdata = mtu;
571 peer->mtu = mtu + peer->hdrsize;
572 spin_unlock_bh(&peer->lock);
573 _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
574 }
575}
576
577/*
546 * process packets in the reception queue 578 * process packets in the reception queue
547 */ 579 */
548static int rxrpc_process_rx_queue(struct rxrpc_call *call, 580static int rxrpc_process_rx_queue(struct rxrpc_call *call,
@@ -606,6 +638,8 @@ process_further:
606 rxrpc_acks[ack.reason], 638 rxrpc_acks[ack.reason],
607 ack.nAcks); 639 ack.nAcks);
608 640
641 rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
642
609 if (ack.reason == RXRPC_ACK_PING) { 643 if (ack.reason == RXRPC_ACK_PING) {
610 _proto("Rx ACK %%%u PING Request", latest); 644 _proto("Rx ACK %%%u PING Request", latest);
611 rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, 645 rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
@@ -801,9 +835,9 @@ void rxrpc_process_call(struct work_struct *work)
801 struct msghdr msg; 835 struct msghdr msg;
802 struct kvec iov[5]; 836 struct kvec iov[5];
803 unsigned long bits; 837 unsigned long bits;
804 __be32 data; 838 __be32 data, pad;
805 size_t len; 839 size_t len;
806 int genbit, loop, nbit, ioc, ret; 840 int genbit, loop, nbit, ioc, ret, mtu;
807 u32 abort_code = RX_PROTOCOL_ERROR; 841 u32 abort_code = RX_PROTOCOL_ERROR;
808 u8 *acks = NULL; 842 u8 *acks = NULL;
809 843
@@ -899,9 +933,30 @@ void rxrpc_process_call(struct work_struct *work)
899 } 933 }
900 934
901 if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) { 935 if (test_bit(RXRPC_CALL_ACK_FINAL, &call->events)) {
902 hdr.type = RXRPC_PACKET_TYPE_ACKALL;
903 genbit = RXRPC_CALL_ACK_FINAL; 936 genbit = RXRPC_CALL_ACK_FINAL;
904 goto send_message; 937
938 ack.bufferSpace = htons(8);
939 ack.maxSkew = 0;
940 ack.serial = 0;
941 ack.reason = RXRPC_ACK_IDLE;
942 ack.nAcks = 0;
943 call->ackr_reason = 0;
944
945 spin_lock_bh(&call->lock);
946 ack.serial = call->ackr_serial;
947 ack.previousPacket = call->ackr_prev_seq;
948 ack.firstPacket = htonl(call->rx_data_eaten + 1);
949 spin_unlock_bh(&call->lock);
950
951 pad = 0;
952
953 iov[1].iov_base = &ack;
954 iov[1].iov_len = sizeof(ack);
955 iov[2].iov_base = &pad;
956 iov[2].iov_len = 3;
957 iov[3].iov_base = &ackinfo;
958 iov[3].iov_len = sizeof(ackinfo);
959 goto send_ACK;
905 } 960 }
906 961
907 if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) | 962 if (call->events & ((1 << RXRPC_CALL_RCVD_BUSY) |
@@ -971,8 +1026,6 @@ void rxrpc_process_call(struct work_struct *work)
971 1026
972 /* consider sending an ordinary ACK */ 1027 /* consider sending an ordinary ACK */
973 if (test_bit(RXRPC_CALL_ACK, &call->events)) { 1028 if (test_bit(RXRPC_CALL_ACK, &call->events)) {
974 __be32 pad;
975
976 _debug("send ACK: window: %d - %d { %lx }", 1029 _debug("send ACK: window: %d - %d { %lx }",
977 call->rx_data_eaten, call->ackr_win_top, 1030 call->rx_data_eaten, call->ackr_win_top,
978 call->ackr_window[0]); 1031 call->ackr_window[0]);
@@ -997,12 +1050,6 @@ void rxrpc_process_call(struct work_struct *work)
997 ack.serial = 0; 1050 ack.serial = 0;
998 ack.reason = 0; 1051 ack.reason = 0;
999 1052
1000 ackinfo.rxMTU = htonl(5692);
1001// ackinfo.rxMTU = htonl(call->conn->trans->peer->maxdata);
1002 ackinfo.maxMTU = htonl(call->conn->trans->peer->maxdata);
1003 ackinfo.rwind = htonl(32);
1004 ackinfo.jumbo_max = htonl(4);
1005
1006 spin_lock_bh(&call->lock); 1053 spin_lock_bh(&call->lock);
1007 ack.reason = call->ackr_reason; 1054 ack.reason = call->ackr_reason;
1008 ack.serial = call->ackr_serial; 1055 ack.serial = call->ackr_serial;
@@ -1116,6 +1163,15 @@ send_ACK_with_skew:
1116 ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) - 1163 ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
1117 ntohl(ack.serial)); 1164 ntohl(ack.serial));
1118send_ACK: 1165send_ACK:
1166 mtu = call->conn->trans->peer->if_mtu;
1167 mtu -= call->conn->trans->peer->hdrsize;
1168 ackinfo.maxMTU = htonl(mtu);
1169 ackinfo.rwind = htonl(32);
1170
1171 /* permit the peer to send us jumbo packets if it wants to */
1172 ackinfo.rxMTU = htonl(5692);
1173 ackinfo.jumbo_max = htonl(4);
1174
1119 hdr.serial = htonl(atomic_inc_return(&call->conn->serial)); 1175 hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
1120 _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", 1176 _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
1121 ntohl(hdr.serial), 1177 ntohl(hdr.serial),
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 2c27df1ffa17..6cb3e8890e7e 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -100,8 +100,10 @@ void rxrpc_UDP_error_report(struct sock *sk)
100 } 100 }
101 101
102 if (mtu < peer->mtu) { 102 if (mtu < peer->mtu) {
103 spin_lock_bh(&peer->lock);
103 peer->mtu = mtu; 104 peer->mtu = mtu;
104 peer->maxdata = peer->mtu - peer->hdrsize; 105 peer->maxdata = peer->mtu - peer->hdrsize;
106 spin_unlock_bh(&peer->lock);
105 _net("Net MTU %u (maxdata %u)", 107 _net("Net MTU %u (maxdata %u)",
106 peer->mtu, peer->maxdata); 108 peer->mtu, peer->maxdata);
107 } 109 }
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 5cdde4a48ed1..591c4422205e 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -582,7 +582,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
582 max &= ~(call->conn->size_align - 1UL); 582 max &= ~(call->conn->size_align - 1UL);
583 583
584 chunk = max; 584 chunk = max;
585 if (chunk > len) 585 if (chunk > len && !more)
586 chunk = len; 586 chunk = len;
587 587
588 space = chunk + call->conn->size_align; 588 space = chunk + call->conn->size_align;
diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c
index d399de4a7fe2..ce08b78647ce 100644
--- a/net/rxrpc/ar-peer.c
+++ b/net/rxrpc/ar-peer.c
@@ -19,6 +19,7 @@
19#include <net/sock.h> 19#include <net/sock.h>
20#include <net/af_rxrpc.h> 20#include <net/af_rxrpc.h>
21#include <net/ip.h> 21#include <net/ip.h>
22#include <net/route.h>
22#include "ar-internal.h" 23#include "ar-internal.h"
23 24
24static LIST_HEAD(rxrpc_peers); 25static LIST_HEAD(rxrpc_peers);
@@ -28,6 +29,47 @@ static DECLARE_WAIT_QUEUE_HEAD(rxrpc_peer_wq);
28static void rxrpc_destroy_peer(struct work_struct *work); 29static void rxrpc_destroy_peer(struct work_struct *work);
29 30
30/* 31/*
32 * assess the MTU size for the network interface through which this peer is
33 * reached
34 */
35static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
36{
37 struct rtable *rt;
38 struct flowi fl;
39 int ret;
40
41 peer->if_mtu = 1500;
42
43 memset(&fl, 0, sizeof(fl));
44
45 switch (peer->srx.transport.family) {
46 case AF_INET:
47 fl.oif = 0;
48 fl.proto = IPPROTO_UDP,
49 fl.nl_u.ip4_u.saddr = 0;
50 fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr;
51 fl.nl_u.ip4_u.tos = 0;
52 /* assume AFS.CM talking to AFS.FS */
53 fl.uli_u.ports.sport = htons(7001);
54 fl.uli_u.ports.dport = htons(7000);
55 break;
56 default:
57 BUG();
58 }
59
60 ret = ip_route_output_key(&rt, &fl);
61 if (ret < 0) {
62 kleave(" [route err %d]", ret);
63 return;
64 }
65
66 peer->if_mtu = dst_mtu(&rt->u.dst);
67 dst_release(&rt->u.dst);
68
69 kleave(" [if_mtu %u]", peer->if_mtu);
70}
71
72/*
31 * allocate a new peer 73 * allocate a new peer
32 */ 74 */
33static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx, 75static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
@@ -47,7 +89,8 @@ static struct rxrpc_peer *rxrpc_alloc_peer(struct sockaddr_rxrpc *srx,
47 peer->debug_id = atomic_inc_return(&rxrpc_debug_id); 89 peer->debug_id = atomic_inc_return(&rxrpc_debug_id);
48 memcpy(&peer->srx, srx, sizeof(*srx)); 90 memcpy(&peer->srx, srx, sizeof(*srx));
49 91
50 peer->mtu = peer->if_mtu = 65535; 92 rxrpc_assess_MTU_size(peer);
93 peer->mtu = peer->if_mtu;
51 94
52 if (srx->transport.family == AF_INET) { 95 if (srx->transport.family == AF_INET) {
53 peer->hdrsize = sizeof(struct iphdr); 96 peer->hdrsize = sizeof(struct iphdr);