diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-09-21 04:47:45 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-09-21 18:05:50 -0400 |
commit | 3d13008e7345fa7a79d8f6438150dc15d6ba6e9d (patch) | |
tree | a36caa5636e4030419ca8907642e75c0cd565143 | |
parent | 7e96dc7045bff8758804b047c0dfb6868f182500 (diff) |
ip: fix truesize mismatch in ip fragmentation
Special care should be taken when slow path is hit in ip_fragment() :
When walking through frags, we transfert truesize ownership from skb to
frags. Then if we hit a slow_path condition, we must undo this or risk
uncharging frags->truesize twice, and in the end, having negative socket
sk_wmem_alloc counter, or even freeing socket sooner than expected.
Many thanks to Nick Bowler, who provided a very clean bug report and
test program.
Thanks to Jarek for reviewing my first patch and providing a V2
While Nick bisection pointed to commit 2b85a34e911 (net: No more
expensive sock_hold()/sock_put() on each tx), underlying bug is older
(2.6.12-rc5)
A side effect is to extend work done in commit b2722b1c3a893e
(ip_fragment: also adjust skb->truesize for packets not owned by a
socket) to ipv6 as well.
Reported-and-bisected-by: Nick Bowler <nbowler@elliptictech.com>
Tested-by: Nick Bowler <nbowler@elliptictech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/ip_output.c | 19 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 18 |
2 files changed, 26 insertions, 11 deletions
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b69896df5f..7649d7750075 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -488,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
488 | * we can switch to copy when see the first bad fragment. | 488 | * we can switch to copy when see the first bad fragment. |
489 | */ | 489 | */ |
490 | if (skb_has_frags(skb)) { | 490 | if (skb_has_frags(skb)) { |
491 | struct sk_buff *frag; | 491 | struct sk_buff *frag, *frag2; |
492 | int first_len = skb_pagelen(skb); | 492 | int first_len = skb_pagelen(skb); |
493 | int truesizes = 0; | ||
494 | 493 | ||
495 | if (first_len - hlen > mtu || | 494 | if (first_len - hlen > mtu || |
496 | ((first_len - hlen) & 7) || | 495 | ((first_len - hlen) & 7) || |
@@ -503,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
503 | if (frag->len > mtu || | 502 | if (frag->len > mtu || |
504 | ((frag->len & 7) && frag->next) || | 503 | ((frag->len & 7) && frag->next) || |
505 | skb_headroom(frag) < hlen) | 504 | skb_headroom(frag) < hlen) |
506 | goto slow_path; | 505 | goto slow_path_clean; |
507 | 506 | ||
508 | /* Partially cloned skb? */ | 507 | /* Partially cloned skb? */ |
509 | if (skb_shared(frag)) | 508 | if (skb_shared(frag)) |
510 | goto slow_path; | 509 | goto slow_path_clean; |
511 | 510 | ||
512 | BUG_ON(frag->sk); | 511 | BUG_ON(frag->sk); |
513 | if (skb->sk) { | 512 | if (skb->sk) { |
514 | frag->sk = skb->sk; | 513 | frag->sk = skb->sk; |
515 | frag->destructor = sock_wfree; | 514 | frag->destructor = sock_wfree; |
516 | } | 515 | } |
517 | truesizes += frag->truesize; | 516 | skb->truesize -= frag->truesize; |
518 | } | 517 | } |
519 | 518 | ||
520 | /* Everything is OK. Generate! */ | 519 | /* Everything is OK. Generate! */ |
@@ -524,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
524 | frag = skb_shinfo(skb)->frag_list; | 523 | frag = skb_shinfo(skb)->frag_list; |
525 | skb_frag_list_init(skb); | 524 | skb_frag_list_init(skb); |
526 | skb->data_len = first_len - skb_headlen(skb); | 525 | skb->data_len = first_len - skb_headlen(skb); |
527 | skb->truesize -= truesizes; | ||
528 | skb->len = first_len; | 526 | skb->len = first_len; |
529 | iph->tot_len = htons(first_len); | 527 | iph->tot_len = htons(first_len); |
530 | iph->frag_off = htons(IP_MF); | 528 | iph->frag_off = htons(IP_MF); |
@@ -576,6 +574,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
576 | } | 574 | } |
577 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); | 575 | IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); |
578 | return err; | 576 | return err; |
577 | |||
578 | slow_path_clean: | ||
579 | skb_walk_frags(skb, frag2) { | ||
580 | if (frag2 == frag) | ||
581 | break; | ||
582 | frag2->sk = NULL; | ||
583 | frag2->destructor = NULL; | ||
584 | skb->truesize += frag2->truesize; | ||
585 | } | ||
579 | } | 586 | } |
580 | 587 | ||
581 | slow_path: | 588 | slow_path: |
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d40b330c0ee6..980912ed7a38 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c | |||
@@ -639,7 +639,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
639 | 639 | ||
640 | if (skb_has_frags(skb)) { | 640 | if (skb_has_frags(skb)) { |
641 | int first_len = skb_pagelen(skb); | 641 | int first_len = skb_pagelen(skb); |
642 | int truesizes = 0; | 642 | struct sk_buff *frag2; |
643 | 643 | ||
644 | if (first_len - hlen > mtu || | 644 | if (first_len - hlen > mtu || |
645 | ((first_len - hlen) & 7) || | 645 | ((first_len - hlen) & 7) || |
@@ -651,18 +651,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
651 | if (frag->len > mtu || | 651 | if (frag->len > mtu || |
652 | ((frag->len & 7) && frag->next) || | 652 | ((frag->len & 7) && frag->next) || |
653 | skb_headroom(frag) < hlen) | 653 | skb_headroom(frag) < hlen) |
654 | goto slow_path; | 654 | goto slow_path_clean; |
655 | 655 | ||
656 | /* Partially cloned skb? */ | 656 | /* Partially cloned skb? */ |
657 | if (skb_shared(frag)) | 657 | if (skb_shared(frag)) |
658 | goto slow_path; | 658 | goto slow_path_clean; |
659 | 659 | ||
660 | BUG_ON(frag->sk); | 660 | BUG_ON(frag->sk); |
661 | if (skb->sk) { | 661 | if (skb->sk) { |
662 | frag->sk = skb->sk; | 662 | frag->sk = skb->sk; |
663 | frag->destructor = sock_wfree; | 663 | frag->destructor = sock_wfree; |
664 | truesizes += frag->truesize; | ||
665 | } | 664 | } |
665 | skb->truesize -= frag->truesize; | ||
666 | } | 666 | } |
667 | 667 | ||
668 | err = 0; | 668 | err = 0; |
@@ -693,7 +693,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
693 | 693 | ||
694 | first_len = skb_pagelen(skb); | 694 | first_len = skb_pagelen(skb); |
695 | skb->data_len = first_len - skb_headlen(skb); | 695 | skb->data_len = first_len - skb_headlen(skb); |
696 | skb->truesize -= truesizes; | ||
697 | skb->len = first_len; | 696 | skb->len = first_len; |
698 | ipv6_hdr(skb)->payload_len = htons(first_len - | 697 | ipv6_hdr(skb)->payload_len = htons(first_len - |
699 | sizeof(struct ipv6hdr)); | 698 | sizeof(struct ipv6hdr)); |
@@ -756,6 +755,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) | |||
756 | IPSTATS_MIB_FRAGFAILS); | 755 | IPSTATS_MIB_FRAGFAILS); |
757 | dst_release(&rt->dst); | 756 | dst_release(&rt->dst); |
758 | return err; | 757 | return err; |
758 | |||
759 | slow_path_clean: | ||
760 | skb_walk_frags(skb, frag2) { | ||
761 | if (frag2 == frag) | ||
762 | break; | ||
763 | frag2->sk = NULL; | ||
764 | frag2->destructor = NULL; | ||
765 | skb->truesize += frag2->truesize; | ||
766 | } | ||
759 | } | 767 | } |
760 | 768 | ||
761 | slow_path: | 769 | slow_path: |