diff options
author | Eric Dumazet <edumazet@google.com> | 2012-04-26 20:38:33 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-04-30 21:35:49 -0400 |
commit | 329033f645d93b5f9160b9b972dbc5431ad22a33 (patch) | |
tree | 1fca09d6bf496391b8ea5d109429e48b28b2d7a8 | |
parent | d7e8883cfcf4851afe74fb380cc62b7fa9cf66ba (diff) |
tcp: makes tcp_try_coalesce aware of skb->head_frag
TCP coalesce can check if skb to be merged has its skb->head mapped to a
page fragment, instead of a kmalloc() area.
We had to disable coalescing in this case, for performance reasons.
We 'upgrade' skb->head as a fragment in itself.
This reduces number of cache misses when user makes its copies, since a
less sk_buff are fetched.
This makes receive and ofo queues shorter and thus reduce cache line
misses in TCP stack.
This is a followup of patch "net: allow skb->head to be a page fragment"
Tested with tg3 nic, with GRO on or off. We can see "TCPRcvCoalesce"
counter being incremented.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Matt Carlson <mcarlson@broadcom.com>
Cc: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_input.c | 55 |
1 files changed, 43 insertions, 12 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c93b0cbb7fc1..96a631deb4e6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4464,10 +4464,12 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | |||
4464 | */ | 4464 | */ |
4465 | static bool tcp_try_coalesce(struct sock *sk, | 4465 | static bool tcp_try_coalesce(struct sock *sk, |
4466 | struct sk_buff *to, | 4466 | struct sk_buff *to, |
4467 | struct sk_buff *from) | 4467 | struct sk_buff *from, |
4468 | bool *fragstolen) | ||
4468 | { | 4469 | { |
4469 | int len = from->len; | 4470 | int delta, len = from->len; |
4470 | 4471 | ||
4472 | *fragstolen = false; | ||
4471 | if (tcp_hdr(from)->fin) | 4473 | if (tcp_hdr(from)->fin) |
4472 | return false; | 4474 | return false; |
4473 | if (len <= skb_tailroom(to)) { | 4475 | if (len <= skb_tailroom(to)) { |
@@ -4478,15 +4480,19 @@ merge: | |||
4478 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; | 4480 | TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; |
4479 | return true; | 4481 | return true; |
4480 | } | 4482 | } |
4483 | |||
4484 | if (skb_has_frag_list(to) || skb_has_frag_list(from)) | ||
4485 | return false; | ||
4486 | |||
4481 | if (skb_headlen(from) == 0 && | 4487 | if (skb_headlen(from) == 0 && |
4482 | !skb_has_frag_list(to) && | ||
4483 | !skb_has_frag_list(from) && | ||
4484 | (skb_shinfo(to)->nr_frags + | 4488 | (skb_shinfo(to)->nr_frags + |
4485 | skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) { | 4489 | skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) { |
4486 | int delta = from->truesize - ksize(from->head) - | 4490 | WARN_ON_ONCE(from->head_frag); |
4487 | SKB_DATA_ALIGN(sizeof(struct sk_buff)); | 4491 | delta = from->truesize - ksize(from->head) - |
4492 | SKB_DATA_ALIGN(sizeof(struct sk_buff)); | ||
4488 | 4493 | ||
4489 | WARN_ON_ONCE(delta < len); | 4494 | WARN_ON_ONCE(delta < len); |
4495 | copyfrags: | ||
4490 | memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, | 4496 | memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, |
4491 | skb_shinfo(from)->frags, | 4497 | skb_shinfo(from)->frags, |
4492 | skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); | 4498 | skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); |
@@ -4499,6 +4505,20 @@ merge: | |||
4499 | to->data_len += len; | 4505 | to->data_len += len; |
4500 | goto merge; | 4506 | goto merge; |
4501 | } | 4507 | } |
4508 | if (from->head_frag) { | ||
4509 | struct page *page; | ||
4510 | unsigned int offset; | ||
4511 | |||
4512 | if (skb_shinfo(to)->nr_frags + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) | ||
4513 | return false; | ||
4514 | page = virt_to_head_page(from->head); | ||
4515 | offset = from->data - (unsigned char *)page_address(page); | ||
4516 | skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, | ||
4517 | page, offset, skb_headlen(from)); | ||
4518 | *fragstolen = true; | ||
4519 | delta = len; /* we dont know real truesize... */ | ||
4520 | goto copyfrags; | ||
4521 | } | ||
4502 | return false; | 4522 | return false; |
4503 | } | 4523 | } |
4504 | 4524 | ||
@@ -4540,10 +4560,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4540 | end_seq = TCP_SKB_CB(skb)->end_seq; | 4560 | end_seq = TCP_SKB_CB(skb)->end_seq; |
4541 | 4561 | ||
4542 | if (seq == TCP_SKB_CB(skb1)->end_seq) { | 4562 | if (seq == TCP_SKB_CB(skb1)->end_seq) { |
4543 | if (!tcp_try_coalesce(sk, skb1, skb)) { | 4563 | bool fragstolen; |
4564 | |||
4565 | if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { | ||
4544 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | 4566 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4545 | } else { | 4567 | } else { |
4546 | __kfree_skb(skb); | 4568 | if (fragstolen) |
4569 | kmem_cache_free(skbuff_head_cache, skb); | ||
4570 | else | ||
4571 | __kfree_skb(skb); | ||
4547 | skb = NULL; | 4572 | skb = NULL; |
4548 | } | 4573 | } |
4549 | 4574 | ||
@@ -4626,6 +4651,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4626 | const struct tcphdr *th = tcp_hdr(skb); | 4651 | const struct tcphdr *th = tcp_hdr(skb); |
4627 | struct tcp_sock *tp = tcp_sk(sk); | 4652 | struct tcp_sock *tp = tcp_sk(sk); |
4628 | int eaten = -1; | 4653 | int eaten = -1; |
4654 | bool fragstolen = false; | ||
4629 | 4655 | ||
4630 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) | 4656 | if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) |
4631 | goto drop; | 4657 | goto drop; |
@@ -4672,7 +4698,9 @@ queue_and_out: | |||
4672 | goto drop; | 4698 | goto drop; |
4673 | 4699 | ||
4674 | tail = skb_peek_tail(&sk->sk_receive_queue); | 4700 | tail = skb_peek_tail(&sk->sk_receive_queue); |
4675 | eaten = (tail && tcp_try_coalesce(sk, tail, skb)) ? 1 : 0; | 4701 | eaten = (tail && |
4702 | tcp_try_coalesce(sk, tail, skb, | ||
4703 | &fragstolen)) ? 1 : 0; | ||
4676 | if (eaten <= 0) { | 4704 | if (eaten <= 0) { |
4677 | skb_set_owner_r(skb, sk); | 4705 | skb_set_owner_r(skb, sk); |
4678 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 4706 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
@@ -4699,9 +4727,12 @@ queue_and_out: | |||
4699 | 4727 | ||
4700 | tcp_fast_path_check(sk); | 4728 | tcp_fast_path_check(sk); |
4701 | 4729 | ||
4702 | if (eaten > 0) | 4730 | if (eaten > 0) { |
4703 | __kfree_skb(skb); | 4731 | if (fragstolen) |
4704 | else if (!sock_flag(sk, SOCK_DEAD)) | 4732 | kmem_cache_free(skbuff_head_cache, skb); |
4733 | else | ||
4734 | __kfree_skb(skb); | ||
4735 | } else if (!sock_flag(sk, SOCK_DEAD)) | ||
4705 | sk->sk_data_ready(sk, 0); | 4736 | sk->sk_data_ready(sk, 0); |
4706 | return; | 4737 | return; |
4707 | } | 4738 | } |