aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-09-15 14:41:12 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-15 14:41:12 -0400
commit437024067ac1fbda7bb3a795e75922f9034672fb (patch)
treec0686708da032f9f6f842903163fa6da23ef649f
parent13bb5180e82349d9fcfa11cd17b3c1e7b558a902 (diff)
parentb3d6cb92fd190d720a01075c4d20cdca896663fc (diff)
Merge branch 'tcpflags'
Eric Dumazet says: ==================== tcp: no longer keep around headers in input path Looking at tcp_try_coalesce() I was wondering why I did : if (tcp_hdr(from)->fin) return false; The answer would be to allow the aggregation, if we simply OR the FIN and PSH flags eventually present in @from to @to packet. (Note a change is also needed in skb_try_coalesce() to avoid calling skb_put() with 0 len) Then, looking at tcp_recvmsg(), I realized we access tcp_hdr(skb)->syn (and maybe tcp_hdr(skb)->fin) for every packet we process from socket receive queue. We have to understand TCP flags are cold in cpu caches most of the time (assuming TCP timestamps, and that application calls recvmsg() a long time after incoming packet was processed), and bringing a whole cache line only to access one bit is not very nice. It would make sense to use in TCP input path TCP_SKB_CB(skb)->tcp_flags as we do in output path. This saves one cache line miss, and TCP tcp_collapse() can avoid dealing with the headers. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/ipv4/tcp.c18
-rw-r--r--net/ipv4/tcp_input.c31
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/tcp_ipv6.c1
5 files changed, 22 insertions, 32 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c8259ac38745..29f7f0121491 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3936,7 +3936,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
3936 return false; 3936 return false;
3937 3937
3938 if (len <= skb_tailroom(to)) { 3938 if (len <= skb_tailroom(to)) {
3939 BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); 3939 if (len)
3940 BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
3940 *delta_truesize = 0; 3941 *delta_truesize = 0;
3941 return true; 3942 return true;
3942 } 3943 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 541f26a67ba2..070aeff1b131 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1510,9 +1510,9 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1510 1510
1511 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { 1511 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
1512 offset = seq - TCP_SKB_CB(skb)->seq; 1512 offset = seq - TCP_SKB_CB(skb)->seq;
1513 if (tcp_hdr(skb)->syn) 1513 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
1514 offset--; 1514 offset--;
1515 if (offset < skb->len || tcp_hdr(skb)->fin) { 1515 if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
1516 *off = offset; 1516 *off = offset;
1517 return skb; 1517 return skb;
1518 } 1518 }
@@ -1585,7 +1585,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1585 if (offset + 1 != skb->len) 1585 if (offset + 1 != skb->len)
1586 continue; 1586 continue;
1587 } 1587 }
1588 if (tcp_hdr(skb)->fin) { 1588 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
1589 sk_eat_skb(sk, skb, false); 1589 sk_eat_skb(sk, skb, false);
1590 ++seq; 1590 ++seq;
1591 break; 1591 break;
@@ -1722,11 +1722,11 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1722 break; 1722 break;
1723 1723
1724 offset = *seq - TCP_SKB_CB(skb)->seq; 1724 offset = *seq - TCP_SKB_CB(skb)->seq;
1725 if (tcp_hdr(skb)->syn) 1725 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
1726 offset--; 1726 offset--;
1727 if (offset < skb->len) 1727 if (offset < skb->len)
1728 goto found_ok_skb; 1728 goto found_ok_skb;
1729 if (tcp_hdr(skb)->fin) 1729 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1730 goto found_fin_ok; 1730 goto found_fin_ok;
1731 WARN(!(flags & MSG_PEEK), 1731 WARN(!(flags & MSG_PEEK),
1732 "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n", 1732 "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
@@ -1959,7 +1959,7 @@ skip_copy:
1959 if (used + offset < skb->len) 1959 if (used + offset < skb->len)
1960 continue; 1960 continue;
1961 1961
1962 if (tcp_hdr(skb)->fin) 1962 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1963 goto found_fin_ok; 1963 goto found_fin_ok;
1964 if (!(flags & MSG_PEEK)) { 1964 if (!(flags & MSG_PEEK)) {
1965 sk_eat_skb(sk, skb, copied_early); 1965 sk_eat_skb(sk, skb, copied_early);
@@ -2160,8 +2160,10 @@ void tcp_close(struct sock *sk, long timeout)
2160 * reader process may not have drained the data yet! 2160 * reader process may not have drained the data yet!
2161 */ 2161 */
2162 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 2162 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
2163 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - 2163 u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq;
2164 tcp_hdr(skb)->fin; 2164
2165 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
2166 len--;
2165 data_was_unread += len; 2167 data_was_unread += len;
2166 __kfree_skb(skb); 2168 __kfree_skb(skb);
2167 } 2169 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f97003ad0af5..ea92f23ffaf1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4093,7 +4093,7 @@ static void tcp_ofo_queue(struct sock *sk)
4093 __skb_unlink(skb, &tp->out_of_order_queue); 4093 __skb_unlink(skb, &tp->out_of_order_queue);
4094 __skb_queue_tail(&sk->sk_receive_queue, skb); 4094 __skb_queue_tail(&sk->sk_receive_queue, skb);
4095 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 4095 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
4096 if (tcp_hdr(skb)->fin) 4096 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
4097 tcp_fin(sk); 4097 tcp_fin(sk);
4098 } 4098 }
4099} 4099}
@@ -4143,9 +4143,6 @@ static bool tcp_try_coalesce(struct sock *sk,
4143 4143
4144 *fragstolen = false; 4144 *fragstolen = false;
4145 4145
4146 if (tcp_hdr(from)->fin)
4147 return false;
4148
4149 /* Its possible this segment overlaps with prior segment in queue */ 4146 /* Its possible this segment overlaps with prior segment in queue */
4150 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) 4147 if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
4151 return false; 4148 return false;
@@ -4158,6 +4155,7 @@ static bool tcp_try_coalesce(struct sock *sk,
4158 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE); 4155 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
4159 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; 4156 TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
4160 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; 4157 TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
4158 TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
4161 return true; 4159 return true;
4162} 4160}
4163 4161
@@ -4513,7 +4511,7 @@ restart:
4513 * - bloated or contains data before "start" or 4511 * - bloated or contains data before "start" or
4514 * overlaps to the next one. 4512 * overlaps to the next one.
4515 */ 4513 */
4516 if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin && 4514 if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
4517 (tcp_win_from_space(skb->truesize) > skb->len || 4515 (tcp_win_from_space(skb->truesize) > skb->len ||
4518 before(TCP_SKB_CB(skb)->seq, start))) { 4516 before(TCP_SKB_CB(skb)->seq, start))) {
4519 end_of_skbs = false; 4517 end_of_skbs = false;
@@ -4532,30 +4530,18 @@ restart:
4532 /* Decided to skip this, advance start seq. */ 4530 /* Decided to skip this, advance start seq. */
4533 start = TCP_SKB_CB(skb)->end_seq; 4531 start = TCP_SKB_CB(skb)->end_seq;
4534 } 4532 }
4535 if (end_of_skbs || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) 4533 if (end_of_skbs ||
4534 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4536 return; 4535 return;
4537 4536
4538 while (before(start, end)) { 4537 while (before(start, end)) {
4538 int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
4539 struct sk_buff *nskb; 4539 struct sk_buff *nskb;
4540 unsigned int header = skb_headroom(skb);
4541 int copy = SKB_MAX_ORDER(header, 0);
4542 4540
4543 /* Too big header? This can happen with IPv6. */ 4541 nskb = alloc_skb(copy, GFP_ATOMIC);
4544 if (copy < 0)
4545 return;
4546 if (end - start < copy)
4547 copy = end - start;
4548 nskb = alloc_skb(copy + header, GFP_ATOMIC);
4549 if (!nskb) 4542 if (!nskb)
4550 return; 4543 return;
4551 4544
4552 skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
4553 skb_set_network_header(nskb, (skb_network_header(skb) -
4554 skb->head));
4555 skb_set_transport_header(nskb, (skb_transport_header(skb) -
4556 skb->head));
4557 skb_reserve(nskb, header);
4558 memcpy(nskb->head, skb->head, header);
4559 memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); 4545 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
4560 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; 4546 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
4561 __skb_queue_before(list, skb, nskb); 4547 __skb_queue_before(list, skb, nskb);
@@ -4579,8 +4565,7 @@ restart:
4579 skb = tcp_collapse_one(sk, skb, list); 4565 skb = tcp_collapse_one(sk, skb, list);
4580 if (!skb || 4566 if (!skb ||
4581 skb == tail || 4567 skb == tail ||
4582 tcp_hdr(skb)->syn || 4568 (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
4583 tcp_hdr(skb)->fin)
4584 return; 4569 return;
4585 } 4570 }
4586 } 4571 }
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7881b96d2b72..006b045716d8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1638,6 +1638,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1638 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1638 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1639 skb->len - th->doff * 4); 1639 skb->len - th->doff * 4);
1640 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1640 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1641 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1641 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1642 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1642 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); 1643 TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1643 TCP_SKB_CB(skb)->sacked = 0; 1644 TCP_SKB_CB(skb)->sacked = 0;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1835480336ac..de51a88bec6f 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1415,6 +1415,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
1415 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1415 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1416 skb->len - th->doff*4); 1416 skb->len - th->doff*4);
1417 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1417 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1418 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1418 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1419 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1419 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1420 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1420 TCP_SKB_CB(skb)->sacked = 0; 1421 TCP_SKB_CB(skb)->sacked = 0;