diff options
author | Eric Dumazet <edumazet@google.com> | 2012-05-02 03:55:58 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-02 21:11:11 -0400 |
commit | 923dd347b8904c24bcac89bf038ed4da87f8aa90 (patch) | |
tree | d329204cb40e4d13e07ffc538fd9978c334ef90c /net/ipv4/tcp_input.c | |
parent | eeb7fc7bc095546b21188e8e076a59bce73f9ca6 (diff) |
net: take care of cloned skbs in tcp_try_coalesce()
Before stealing fragments or skb head, we must make sure skbs are not
cloned.
Alexander was worried about destination skb being cloned : In bridge
setups, a driver could be fooled if skb->data_len would not match skb
nr_frags.
If source skb is cloned, we must take references on pages instead.
Bug happened using tcpdump (if not using mmap())
Introduce kfree_skb_partial() helper to cleanup code.
Reported-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 42 |
1 files changed, 28 insertions, 14 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7096790e06bf..a8829370f712 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -4532,6 +4532,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | |||
4532 | * @sk: socket | 4532 | * @sk: socket |
4533 | * @to: prior buffer | 4533 | * @to: prior buffer |
4534 | * @from: buffer to add in queue | 4534 | * @from: buffer to add in queue |
4535 | * @fragstolen: pointer to boolean | ||
4535 | * | 4536 | * |
4536 | * Before queueing skb @from after @to, try to merge them | 4537 | * Before queueing skb @from after @to, try to merge them |
4537 | * to reduce overall memory use and queue lengths, if cost is small. | 4538 | * to reduce overall memory use and queue lengths, if cost is small. |
@@ -4544,10 +4545,10 @@ static bool tcp_try_coalesce(struct sock *sk, | |||
4544 | struct sk_buff *from, | 4545 | struct sk_buff *from, |
4545 | bool *fragstolen) | 4546 | bool *fragstolen) |
4546 | { | 4547 | { |
4547 | int delta, len = from->len; | 4548 | int i, delta, len = from->len; |
4548 | 4549 | ||
4549 | *fragstolen = false; | 4550 | *fragstolen = false; |
4550 | if (tcp_hdr(from)->fin) | 4551 | if (tcp_hdr(from)->fin || skb_cloned(to)) |
4551 | return false; | 4552 | return false; |
4552 | if (len <= skb_tailroom(to)) { | 4553 | if (len <= skb_tailroom(to)) { |
4553 | BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); | 4554 | BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); |
@@ -4574,7 +4575,13 @@ copyfrags: | |||
4574 | skb_shinfo(from)->frags, | 4575 | skb_shinfo(from)->frags, |
4575 | skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); | 4576 | skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); |
4576 | skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; | 4577 | skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; |
4577 | skb_shinfo(from)->nr_frags = 0; | 4578 | |
4579 | if (skb_cloned(from)) | ||
4580 | for (i = 0; i < skb_shinfo(from)->nr_frags; i++) | ||
4581 | skb_frag_ref(from, i); | ||
4582 | else | ||
4583 | skb_shinfo(from)->nr_frags = 0; | ||
4584 | |||
4578 | to->truesize += delta; | 4585 | to->truesize += delta; |
4579 | atomic_add(delta, &sk->sk_rmem_alloc); | 4586 | atomic_add(delta, &sk->sk_rmem_alloc); |
4580 | sk_mem_charge(sk, delta); | 4587 | sk_mem_charge(sk, delta); |
@@ -4592,13 +4599,26 @@ copyfrags: | |||
4592 | offset = from->data - (unsigned char *)page_address(page); | 4599 | offset = from->data - (unsigned char *)page_address(page); |
4593 | skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, | 4600 | skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, |
4594 | page, offset, skb_headlen(from)); | 4601 | page, offset, skb_headlen(from)); |
4595 | *fragstolen = true; | 4602 | |
4603 | if (skb_cloned(from)) | ||
4604 | get_page(page); | ||
4605 | else | ||
4606 | *fragstolen = true; | ||
4607 | |||
4596 | delta = len; /* we dont know real truesize... */ | 4608 | delta = len; /* we dont know real truesize... */ |
4597 | goto copyfrags; | 4609 | goto copyfrags; |
4598 | } | 4610 | } |
4599 | return false; | 4611 | return false; |
4600 | } | 4612 | } |
4601 | 4613 | ||
4614 | static void kfree_skb_partial(struct sk_buff *skb, bool head_stolen) | ||
4615 | { | ||
4616 | if (head_stolen) | ||
4617 | kmem_cache_free(skbuff_head_cache, skb); | ||
4618 | else | ||
4619 | __kfree_skb(skb); | ||
4620 | } | ||
4621 | |||
4602 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | 4622 | static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) |
4603 | { | 4623 | { |
4604 | struct tcp_sock *tp = tcp_sk(sk); | 4624 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -4642,10 +4662,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) | |||
4642 | if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { | 4662 | if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { |
4643 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); | 4663 | __skb_queue_after(&tp->out_of_order_queue, skb1, skb); |
4644 | } else { | 4664 | } else { |
4645 | if (fragstolen) | 4665 | kfree_skb_partial(skb, fragstolen); |
4646 | kmem_cache_free(skbuff_head_cache, skb); | ||
4647 | else | ||
4648 | __kfree_skb(skb); | ||
4649 | skb = NULL; | 4666 | skb = NULL; |
4650 | } | 4667 | } |
4651 | 4668 | ||
@@ -4804,12 +4821,9 @@ queue_and_out: | |||
4804 | 4821 | ||
4805 | tcp_fast_path_check(sk); | 4822 | tcp_fast_path_check(sk); |
4806 | 4823 | ||
4807 | if (eaten > 0) { | 4824 | if (eaten > 0) |
4808 | if (fragstolen) | 4825 | kfree_skb_partial(skb, fragstolen); |
4809 | kmem_cache_free(skbuff_head_cache, skb); | 4826 | else if (!sock_flag(sk, SOCK_DEAD)) |
4810 | else | ||
4811 | __kfree_skb(skb); | ||
4812 | } else if (!sock_flag(sk, SOCK_DEAD)) | ||
4813 | sk->sk_data_ready(sk, 0); | 4827 | sk->sk_data_ready(sk, 0); |
4814 | return; | 4828 | return; |
4815 | } | 4829 | } |