diff options
author | Eric Dumazet <edumazet@google.com> | 2017-05-11 18:24:41 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-05-11 21:32:48 -0400 |
commit | f6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 (patch) | |
tree | a341e4360d881c74496da6f86d7127a9ffe65191 /net/core | |
parent | 4e3c60ed2f594e4ec6bb81d71ef6993e60f740d8 (diff) |
netem: fix skb_orphan_partial()
I should have known that lowering skb->truesize was dangerous :/
In case packets are not leaving the host via a standard Ethernet device,
but looped back to local sockets, bad things can happen, as reported
by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 )
So instead of tweaking skb->truesize, lets change skb->destructor
and keep a reference on the owner socket via its sk_refcnt.
Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Michael Madsen <mkm@nabto.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/sock.c | 20 |
1 files changed, 8 insertions, 12 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 79c6aee6af9b..e43e71d7856b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -1803,28 +1803,24 @@ EXPORT_SYMBOL(skb_set_owner_w); | |||
1803 | * delay queue. We want to allow the owner socket to send more | 1803 | * delay queue. We want to allow the owner socket to send more |
1804 | * packets, as if they were already TX completed by a typical driver. | 1804 | * packets, as if they were already TX completed by a typical driver. |
1805 | * But we also want to keep skb->sk set because some packet schedulers | 1805 | * But we also want to keep skb->sk set because some packet schedulers |
1806 | * rely on it (sch_fq for example). So we set skb->truesize to a small | 1806 | * rely on it (sch_fq for example). |
1807 | * amount (1) and decrease sk_wmem_alloc accordingly. | ||
1808 | */ | 1807 | */ |
1809 | void skb_orphan_partial(struct sk_buff *skb) | 1808 | void skb_orphan_partial(struct sk_buff *skb) |
1810 | { | 1809 | { |
1811 | /* If this skb is a TCP pure ACK or already went here, | 1810 | if (skb_is_tcp_pure_ack(skb)) |
1812 | * we have nothing to do. 2 is already a very small truesize. | ||
1813 | */ | ||
1814 | if (skb->truesize <= 2) | ||
1815 | return; | 1811 | return; |
1816 | 1812 | ||
1817 | /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc, | ||
1818 | * so we do not completely orphan skb, but transfert all | ||
1819 | * accounted bytes but one, to avoid unexpected reorders. | ||
1820 | */ | ||
1821 | if (skb->destructor == sock_wfree | 1813 | if (skb->destructor == sock_wfree |
1822 | #ifdef CONFIG_INET | 1814 | #ifdef CONFIG_INET |
1823 | || skb->destructor == tcp_wfree | 1815 | || skb->destructor == tcp_wfree |
1824 | #endif | 1816 | #endif |
1825 | ) { | 1817 | ) { |
1826 | atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); | 1818 | struct sock *sk = skb->sk; |
1827 | skb->truesize = 1; | 1819 | |
1820 | if (atomic_inc_not_zero(&sk->sk_refcnt)) { | ||
1821 | atomic_sub(skb->truesize, &sk->sk_wmem_alloc); | ||
1822 | skb->destructor = sock_efree; | ||
1823 | } | ||
1828 | } else { | 1824 | } else { |
1829 | skb_orphan(skb); | 1825 | skb_orphan(skb); |
1830 | } | 1826 | } |