aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2017-05-11 18:24:41 -0400
committerDavid S. Miller <davem@davemloft.net>2017-05-11 21:32:48 -0400
commitf6ba8d33cfbb46df569972e64dbb5bb7e929bfd9 (patch)
treea341e4360d881c74496da6f86d7127a9ffe65191 /net/core
parent4e3c60ed2f594e4ec6bb81d71ef6993e60f740d8 (diff)
netem: fix skb_orphan_partial()
I should have known that lowering skb->truesize was dangerous :/ In case packets are not leaving the host via a standard Ethernet device, but looped back to local sockets, bad things can happen, as reported by Michael Madsen ( https://bugzilla.kernel.org/show_bug.cgi?id=195713 ) So instead of tweaking skb->truesize, lets change skb->destructor and keep a reference on the owner socket via its sk_refcnt. Fixes: f2f872f9272a ("netem: Introduce skb_orphan_partial() helper") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Michael Madsen <mkm@nabto.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/sock.c20
1 files changed, 8 insertions, 12 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 79c6aee6af9b..e43e71d7856b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1803,28 +1803,24 @@ EXPORT_SYMBOL(skb_set_owner_w);
1803 * delay queue. We want to allow the owner socket to send more 1803 * delay queue. We want to allow the owner socket to send more
1804 * packets, as if they were already TX completed by a typical driver. 1804 * packets, as if they were already TX completed by a typical driver.
1805 * But we also want to keep skb->sk set because some packet schedulers 1805 * But we also want to keep skb->sk set because some packet schedulers
1806 * rely on it (sch_fq for example). So we set skb->truesize to a small 1806 * rely on it (sch_fq for example).
1807 * amount (1) and decrease sk_wmem_alloc accordingly.
1808 */ 1807 */
1809void skb_orphan_partial(struct sk_buff *skb) 1808void skb_orphan_partial(struct sk_buff *skb)
1810{ 1809{
1811 /* If this skb is a TCP pure ACK or already went here, 1810 if (skb_is_tcp_pure_ack(skb))
1812 * we have nothing to do. 2 is already a very small truesize.
1813 */
1814 if (skb->truesize <= 2)
1815 return; 1811 return;
1816 1812
1817 /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
1818 * so we do not completely orphan skb, but transfert all
1819 * accounted bytes but one, to avoid unexpected reorders.
1820 */
1821 if (skb->destructor == sock_wfree 1813 if (skb->destructor == sock_wfree
1822#ifdef CONFIG_INET 1814#ifdef CONFIG_INET
1823 || skb->destructor == tcp_wfree 1815 || skb->destructor == tcp_wfree
1824#endif 1816#endif
1825 ) { 1817 ) {
1826 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc); 1818 struct sock *sk = skb->sk;
1827 skb->truesize = 1; 1819
1820 if (atomic_inc_not_zero(&sk->sk_refcnt)) {
1821 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1822 skb->destructor = sock_efree;
1823 }
1828 } else { 1824 } else {
1829 skb_orphan(skb); 1825 skb_orphan(skb);
1830 } 1826 }