aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/tcp.h
diff options
context:
space:
mode:
authorMartin KaFai Lau <kafai@fb.com>2016-04-25 17:44:48 -0400
committerDavid S. Miller <davem@davemloft.net>2016-04-28 16:14:18 -0400
commitc134ecb87817ce70fd62b2dc48bb079c44fc08df (patch)
tree53328726428d98baa71257d393764ce7013be11b /include/net/tcp.h
parent2a9e8438a29c00432ae14eaceb088b965f8ac290 (diff)
tcp: Make use of MSG_EOR in tcp_sendmsg
This patch adds an eor bit to the TCP_SKB_CB. When MSG_EOR is passed to tcp_sendmsg, the eor bit will be set at the skb containing the last byte of the userland's msg. The eor bit will prevent data from appending to that skb in the future. The change in do_tcp_sendpages is to honor the eor set during the previous tcp_sendmsg(MSG_EOR) call. This patch handles the tcp_sendmsg case. The followup patches will handle other skb coalescing and fragment cases. One potential use case is to use MSG_EOR with SOF_TIMESTAMPING_TX_ACK to get a more accurate TCP ack timestamping on application protocol with multiple outgoing response messages (e.g. HTTP2). Packetdrill script for testing: ~~~~~~ +0 `sysctl -q -w net.ipv4.tcp_min_tso_segs=10` +0 `sysctl -q -w net.ipv4.tcp_no_metrics_save=1` +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 0.100 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> 0.100 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7> 0.200 < . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 0.200 write(4, ..., 14600) = 14600 0.200 sendto(4, ..., 730, MSG_EOR, ..., ...) = 730 0.200 sendto(4, ..., 730, MSG_EOR, ..., ...) = 730 0.200 > . 1:7301(7300) ack 1 0.200 > P. 7301:14601(7300) ack 1 0.300 < . 1:1(0) ack 14601 win 257 0.300 > P. 14601:15331(730) ack 1 0.300 > P. 15331:16061(730) ack 1 0.400 < . 1:1(0) ack 16061 win 257 0.400 close(4) = 0 0.400 > F. 16061:16061(0) ack 1 0.400 < F. 1:1(0) ack 16062 win 257 0.400 > . 16062:16062(0) ack 2 Signed-off-by: Martin KaFai Lau <kafai@fb.com> Cc: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Soheil Hassas Yeganeh <soheil@google.com> Cc: Willem de Bruijn <willemb@google.com> Cc: Yuchung Cheng <ycheng@google.com> Suggested-by: Eric Dumazet <edumazet@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Acked-by: Soheil Hassas Yeganeh <soheil@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/tcp.h')
-rw-r--r--include/net/tcp.h8
1 files changed, 7 insertions, 1 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 992f317c1abe..24ec80483805 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -761,7 +761,8 @@ struct tcp_skb_cb {
761 761
762 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ 762 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
763 __u8 txstamp_ack:1, /* Record TX timestamp for ack? */ 763 __u8 txstamp_ack:1, /* Record TX timestamp for ack? */
764 unused:7; 764 eor:1, /* Is skb MSG_EOR marked? */
765 unused:6;
765 __u32 ack_seq; /* Sequence number ACK'd */ 766 __u32 ack_seq; /* Sequence number ACK'd */
766 union { 767 union {
767 struct inet_skb_parm h4; 768 struct inet_skb_parm h4;
@@ -808,6 +809,11 @@ static inline int tcp_skb_mss(const struct sk_buff *skb)
808 return TCP_SKB_CB(skb)->tcp_gso_size; 809 return TCP_SKB_CB(skb)->tcp_gso_size;
809} 810}
810 811
812static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
813{
814 return likely(!TCP_SKB_CB(skb)->eor);
815}
816
811/* Events passed to congestion control interface */ 817/* Events passed to congestion control interface */
812enum tcp_ca_event { 818enum tcp_ca_event {
813 CA_EVENT_TX_START, /* first transmit when no packets in flight */ 819 CA_EVENT_TX_START, /* first transmit when no packets in flight */