aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2018-09-27 10:13:08 -0400
committerDavid Howells <dhowells@redhat.com>2018-09-28 05:32:03 -0400
commitb604dd9883f783a94020d772e4fe03160f455372 (patch)
tree4aea958be93cbd979e2325ac8a2126b836f13f9c
parentdc71db34e4f3c06b8277c8f3c2ff014610607a8c (diff)
rxrpc: Fix RTT gathering
Fix RTT information gathering in AF_RXRPC by the following means: (1) Enable Rx timestamping on the transport socket with SO_TIMESTAMPNS. (2) If the sk_buff doesn't have a timestamp set when rxrpc_data_ready() collects it, set it at that point. (3) Allow ACKs to be requested on the last packet of a client call, but not a service call. We need to be careful lest we undo: bf7d620abf22c321208a4da4f435e7af52551a21 Author: David Howells <dhowells@redhat.com> Date: Thu Oct 6 08:11:51 2016 +0100 rxrpc: Don't request an ACK on the last DATA packet of a call's Tx phase but that only really applies to service calls that we're handling, since the client side gets to send the final ACK (or not). (4) When about to transmit an ACK or DATA packet, record the Tx timestamp before only; don't update the timestamp afterwards. (5) Switch the ordering between recording the serial and recording the timestamp to always set the serial number first. The serial number shouldn't be seen referenced by an ACK packet until we've transmitted the packet bearing it - so in the Rx path, we don't need the timestamp until we've checked the serial number. Fixes: cf1a6474f807 ("rxrpc: Add per-peer RTT tracker") Signed-off-by: David Howells <dhowells@redhat.com>
-rw-r--r--net/rxrpc/input.c8
-rw-r--r--net/rxrpc/local_object.c9
-rw-r--r--net/rxrpc/output.c31
3 files changed, 33 insertions, 15 deletions
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index ec299c627f77..7f9ed3a60b9a 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -622,13 +622,14 @@ static void rxrpc_input_requested_ack(struct rxrpc_call *call,
622 if (!skb) 622 if (!skb)
623 continue; 623 continue;
624 624
625 sent_at = skb->tstamp;
626 smp_rmb(); /* Read timestamp before serial. */
625 sp = rxrpc_skb(skb); 627 sp = rxrpc_skb(skb);
626 if (sp->hdr.serial != orig_serial) 628 if (sp->hdr.serial != orig_serial)
627 continue; 629 continue;
628 smp_rmb();
629 sent_at = skb->tstamp;
630 goto found; 630 goto found;
631 } 631 }
632
632 return; 633 return;
633 634
634found: 635found:
@@ -1143,6 +1144,9 @@ void rxrpc_data_ready(struct sock *udp_sk)
1143 return; 1144 return;
1144 } 1145 }
1145 1146
1147 if (skb->tstamp == 0)
1148 skb->tstamp = ktime_get_real();
1149
1146 rxrpc_new_skb(skb, rxrpc_skb_rx_received); 1150 rxrpc_new_skb(skb, rxrpc_skb_rx_received);
1147 1151
1148 _net("recv skb %p", skb); 1152 _net("recv skb %p", skb);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 777c3ed4cfc0..81de7d889ffa 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -173,6 +173,15 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
173 _debug("setsockopt failed"); 173 _debug("setsockopt failed");
174 goto error; 174 goto error;
175 } 175 }
176
177 /* We want receive timestamps. */
178 opt = 1;
179 ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS,
180 (char *)&opt, sizeof(opt));
181 if (ret < 0) {
182 _debug("setsockopt failed");
183 goto error;
184 }
176 break; 185 break;
177 186
178 default: 187 default:
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index ccf5de160444..8a4da3fe96df 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -124,7 +124,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
124 struct kvec iov[2]; 124 struct kvec iov[2];
125 rxrpc_serial_t serial; 125 rxrpc_serial_t serial;
126 rxrpc_seq_t hard_ack, top; 126 rxrpc_seq_t hard_ack, top;
127 ktime_t now;
128 size_t len, n; 127 size_t len, n;
129 int ret; 128 int ret;
130 u8 reason; 129 u8 reason;
@@ -196,9 +195,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
196 /* We need to stick a time in before we send the packet in case 195 /* We need to stick a time in before we send the packet in case
197 * the reply gets back before kernel_sendmsg() completes - but 196 * the reply gets back before kernel_sendmsg() completes - but
198 * asking UDP to send the packet can take a relatively long 197 * asking UDP to send the packet can take a relatively long
199 * time, so we update the time after, on the assumption that 198 * time.
200 * the packet transmission is more likely to happen towards the
201 * end of the kernel_sendmsg() call.
202 */ 199 */
203 call->ping_time = ktime_get_real(); 200 call->ping_time = ktime_get_real();
204 set_bit(RXRPC_CALL_PINGING, &call->flags); 201 set_bit(RXRPC_CALL_PINGING, &call->flags);
@@ -206,9 +203,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping,
206 } 203 }
207 204
208 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); 205 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
209 now = ktime_get_real();
210 if (ping)
211 call->ping_time = now;
212 conn->params.peer->last_tx_at = ktime_get_seconds(); 206 conn->params.peer->last_tx_at = ktime_get_seconds();
213 if (ret < 0) 207 if (ret < 0)
214 trace_rxrpc_tx_fail(call->debug_id, serial, ret, 208 trace_rxrpc_tx_fail(call->debug_id, serial, ret,
@@ -363,8 +357,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
363 357
364 /* If our RTT cache needs working on, request an ACK. Also request 358 /* If our RTT cache needs working on, request an ACK. Also request
365 * ACKs if a DATA packet appears to have been lost. 359 * ACKs if a DATA packet appears to have been lost.
360 *
361 * However, we mustn't request an ACK on the last reply packet of a
362 * service call, lest OpenAFS incorrectly send us an ACK with some
363 * soft-ACKs in it and then never follow up with a proper hard ACK.
366 */ 364 */
367 if (!(sp->hdr.flags & RXRPC_LAST_PACKET) && 365 if ((!(sp->hdr.flags & RXRPC_LAST_PACKET) ||
366 rxrpc_to_server(sp)
367 ) &&
368 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || 368 (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) ||
369 retrans || 369 retrans ||
370 call->cong_mode == RXRPC_CALL_SLOW_START || 370 call->cong_mode == RXRPC_CALL_SLOW_START ||
@@ -390,6 +390,11 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
390 goto send_fragmentable; 390 goto send_fragmentable;
391 391
392 down_read(&conn->params.local->defrag_sem); 392 down_read(&conn->params.local->defrag_sem);
393
394 sp->hdr.serial = serial;
395 smp_wmb(); /* Set serial before timestamp */
396 skb->tstamp = ktime_get_real();
397
393 /* send the packet by UDP 398 /* send the packet by UDP
394 * - returns -EMSGSIZE if UDP would have to fragment the packet 399 * - returns -EMSGSIZE if UDP would have to fragment the packet
395 * to go out of the interface 400 * to go out of the interface
@@ -413,12 +418,8 @@ done:
413 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, 418 trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
414 retrans, lost); 419 retrans, lost);
415 if (ret >= 0) { 420 if (ret >= 0) {
416 ktime_t now = ktime_get_real();
417 skb->tstamp = now;
418 smp_wmb();
419 sp->hdr.serial = serial;
420 if (whdr.flags & RXRPC_REQUEST_ACK) { 421 if (whdr.flags & RXRPC_REQUEST_ACK) {
421 call->peer->rtt_last_req = now; 422 call->peer->rtt_last_req = skb->tstamp;
422 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); 423 trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
423 if (call->peer->rtt_usage > 1) { 424 if (call->peer->rtt_usage > 1) {
424 unsigned long nowj = jiffies, ack_lost_at; 425 unsigned long nowj = jiffies, ack_lost_at;
@@ -457,6 +458,10 @@ send_fragmentable:
457 458
458 down_write(&conn->params.local->defrag_sem); 459 down_write(&conn->params.local->defrag_sem);
459 460
461 sp->hdr.serial = serial;
462 smp_wmb(); /* Set serial before timestamp */
463 skb->tstamp = ktime_get_real();
464
460 switch (conn->params.local->srx.transport.family) { 465 switch (conn->params.local->srx.transport.family) {
461 case AF_INET: 466 case AF_INET:
462 opt = IP_PMTUDISC_DONT; 467 opt = IP_PMTUDISC_DONT;