diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-09-21 04:47:45 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-09-21 18:05:50 -0400 |
commit | 3d13008e7345fa7a79d8f6438150dc15d6ba6e9d (patch) | |
tree | a36caa5636e4030419ca8907642e75c0cd565143 /net/ipv4/ip_output.c | |
parent | 7e96dc7045bff8758804b047c0dfb6868f182500 (diff) |
ip: fix truesize mismatch in ip fragmentation
Special care should be taken when slow path is hit in ip_fragment() :
When walking through frags, we transfert truesize ownership from skb to
frags. Then if we hit a slow_path condition, we must undo this or risk
uncharging frags->truesize twice, and in the end, having negative socket
sk_wmem_alloc counter, or even freeing socket sooner than expected.
Many thanks to Nick Bowler, who provided a very clean bug report and
test program.
Thanks to Jarek for reviewing my first patch and providing a V2
While Nick bisection pointed to commit 2b85a34e911 (net: No more
expensive sock_hold()/sock_put() on each tx), underlying bug is older
(2.6.12-rc5)
A side effect is to extend work done in commit b2722b1c3a893e
(ip_fragment: also adjust skb->truesize for packets not owned by a
socket) to ipv6 as well.
Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
Tested-by: Nick Bowler <nbowler@elliptictech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/ip_output.c')
-rw-r--r-- | net/ipv4/ip_output.c | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b69896df5f..7649d7750075 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -488,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
488 | * we can switch to copy when see the first bad fragment. | 488 | * we can switch to copy when see the first bad fragment. |
489 | */ | 489 | */ |
490 | if (skb_has_frags(skb)) { | 490 | if (skb_has_frags(skb)) { |
491 | struct sk_buff *frag; | 491 | struct sk_buff *frag, *frag2; |
492 | int first_len = skb_pagelen(skb); | 492 | int first_len = skb_pagelen(skb); |
493 | int truesizes = 0; | ||
494 | 493 | ||
495 | if (first_len - hlen > mtu || | 494 | if (first_len - hlen > mtu || |
496 | ((first_len - hlen) & 7) || | 495 | ((first_len - hlen) & 7) || |
@@ -503,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
503 | if (frag->len > mtu || | 502 | if (frag->len > mtu || |
504 | ((frag->len & 7) && frag->next) || | 503 | ((frag->len & 7) && frag->next) || |
505 | skb_headroom(frag) < hlen) | 504 | skb_headroom(frag) < hlen) |
506 | goto slow_path; | 505 | goto slow_path_clean; |
507 | 506 | ||
508 | /* Partially cloned skb? */ | 507 | /* Partially cloned skb? */ |
509 | if (skb_shared(frag)) | 508 | if (skb_shared(frag)) |
510 | goto slow_path; | 509 | goto slow_path_clean; |
511 | 510 | ||
512 | BUG_ON(frag->sk); | 511 | BUG_ON(frag->sk); |
513 | if (skb->sk) { | 512 | if (skb->sk) { |
514 | frag->sk = skb->sk; | 513 | frag->sk = skb->sk; |
515 | frag->destructor = sock_wfree; | 514 | frag->destructor = sock_wfree; |
516 | } | 515 | } |
517 | truesizes += frag->truesize; | 516 | skb->truesize -= frag->truesize; |
518 | } | 517 | } |
519 | 518 | ||
520 | /* Everything is OK. Generate! */ | 519 | /* Everything is OK. Generate! */ |
@@ -524,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
524 | frag = skb_shinfo(skb)->frag_list; | 523 | frag = skb_shinfo(skb)->frag_list; |
525 | skb_frag_list_init(skb); | 524 | skb_frag_list_init(skb); |
526 | skb->data_len = first_len - skb_headlen(skb); | 525 | skb->data_len = first_len - skb_headlen(skb); |
527 | skb->truesize -= truesizes; | ||
528 | skb->len = first_len; | 526 | skb->len = first_len; |
529 | iph->tot_len = htons(first_len); | 527 | iph->tot_len = htons(first_len); |
530 | iph->frag_off = htons(IP_MF); | 528 | iph->frag_off = htons(IP_MF); |
@@ -576,6 +574,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
576 | } | 574 | } |
577 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 575 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
578 | return err; | 576 | return err; |
577 | |||
578 | slow_path_clean: | ||
579 | skb_walk_frags(skb, frag2) { | ||
580 | if (frag2 == frag) | ||
581 | break; | ||
582 | frag2->sk = NULL; | ||
583 | frag2->destructor = NULL; | ||
584 | skb->truesize += frag2->truesize; | ||
585 | } | ||
579 | } | 586 | } |
580 | 587 | ||
581 | slow_path: | 588 | slow_path: |