From 8afdd99a1315e759de04ad6e2344f0c5f17ecb1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2013 18:07:23 -0800 Subject: udp: ipv4: fix an use after free in __udp4_lib_rcv() Dave Jones reported a use after free in UDP stack : [ 5059.434216] ========================= [ 5059.434314] [ BUG: held lock freed! ] [ 5059.434420] 3.13.0-rc3+ #9 Not tainted [ 5059.434520] ------------------------- [ 5059.434620] named/863 is freeing memory ffff88005e960000-ffff88005e96061f, with a lock still held there! [ 5059.434815] (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435012] 3 locks held by named/863: [ 5059.435086] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x11d/0x940 [ 5059.435295] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x3e/0x410 [ 5059.435500] #2: (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435734] stack backtrace: [ 5059.435858] CPU: 0 PID: 863 Comm: named Not tainted 3.13.0-rc3+ #9 [loadavg: 0.21 0.06 0.06 1/115 1365] [ 5059.436052] Hardware name: /D510MO, BIOS MOPNV10J.86A.0175.2010.0308.0620 03/08/2010 [ 5059.436223] 0000000000000002 ffff88007e203ad8 ffffffff8153a372 ffff8800677130e0 [ 5059.436390] ffff88007e203b10 ffffffff8108cafa ffff88005e960000 ffff88007b00cfc0 [ 5059.436554] ffffea00017a5800 ffffffff8141c490 0000000000000246 ffff88007e203b48 [ 5059.436718] Call Trace: [ 5059.436769] [] dump_stack+0x4d/0x66 [ 5059.436904] [] debug_check_no_locks_freed+0x15a/0x160 [ 5059.437037] [] ? __sk_free+0x110/0x230 [ 5059.437147] [] kmem_cache_free+0x6a/0x150 [ 5059.437260] [] __sk_free+0x110/0x230 [ 5059.437364] [] sk_free+0x19/0x20 [ 5059.437463] [] sock_edemux+0x25/0x40 [ 5059.437567] [] sock_queue_rcv_skb+0x81/0x280 [ 5059.437685] [] ? udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.437805] [] __udp_queue_rcv_skb+0x42/0x240 [ 5059.437925] [] ? _raw_spin_lock+0x65/0x70 [ 5059.438038] [] udp_queue_rcv_skb+0x26b/0x4b0 [ 5059.438155] [] __udp4_lib_rcv+0x152/0xb00 [ 5059.438269] [] udp_rcv+0x15/0x20 [ 5059.438367] [] ip_local_deliver_finish+0x10f/0x410 [ 5059.438492] [] ? ip_local_deliver_finish+0x3e/0x410 [ 5059.438621] [] ip_local_deliver+0x43/0x80 [ 5059.438733] [] ip_rcv_finish+0x140/0x5a0 [ 5059.438843] [] ip_rcv+0x296/0x3f0 [ 5059.438945] [] __netif_receive_skb_core+0x742/0x940 [ 5059.439074] [] ? __netif_receive_skb_core+0x11d/0x940 [ 5059.442231] [] ? trace_hardirqs_on+0xd/0x10 [ 5059.442231] [] __netif_receive_skb+0x13/0x60 [ 5059.442231] [] netif_receive_skb+0x1e/0x1f0 [ 5059.442231] [] napi_gro_receive+0x70/0xa0 [ 5059.442231] [] rtl8169_poll+0x166/0x700 [r8169] [ 5059.442231] [] net_rx_action+0x129/0x1e0 [ 5059.442231] [] __do_softirq+0xed/0x240 [ 5059.442231] [] irq_exit+0x125/0x140 [ 5059.442231] [] do_IRQ+0x51/0xc0 [ 5059.442231] [] common_interrupt+0x6f/0x6f We need to keep a reference on the socket, by using skb_steal_sock() at the right place. Note that another patch is needed to fix a race in udp_sk_rx_dst_set(), as we hold no lock protecting the dst. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44f6a20fa29d..2e2aecbe22c4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -560,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { - struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) - return sk; - else - return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, @@ -1739,15 +1735,15 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (skb->sk) { + sk = skb_steal_sock(skb); + if (sk) { int ret; - sk = skb->sk; if (unlikely(sk->sk_rx_dst == NULL)) udp_sk_rx_dst_set(sk, skb); ret = udp_queue_rcv_skb(sk, skb); - + sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ -- cgit v1.2.2 From 610438b74496b2986a9025f8e23c134cb638e338 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 08:10:05 -0800 Subject: udp: ipv4: fix potential use after free in udp_v4_early_demux() pskb_may_pull() can reallocate skb->head, we need to move the initialization of iph and uh pointers after its call. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2e2aecbe22c4..16d246a51a02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1909,17 +1909,20 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, void udp_v4_early_demux(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *uh = udp_hdr(skb); + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; - struct net *net = dev_net(skb->dev); int dif = skb->dev->ifindex; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, -- cgit v1.2.2 From 975022310233fb0f0193873d79a7b8438070fa82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 14:46:51 -0800 Subject: udp: ipv4: must add synchronization in udp_sk_rx_dst_set() Unlike TCP, UDP input path does not hold the socket lock. Before messing with sk->sk_rx_dst, we must use a spinlock, otherwise multiple cpus could leak a refcount. This patch also takes care of renewing a stale dst entry. (When the sk->sk_rx_dst would not be used by IP early demux) Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 16d246a51a02..62c19fdd102d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1599,12 +1599,21 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } -static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +/* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use sk_dst_lock to guard against concurrent changes. + */ +static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *old; - dst_hold(dst); - sk->sk_rx_dst = dst; + spin_lock(&sk->sk_dst_lock); + old = sk->sk_rx_dst; + if (likely(old != dst)) { + dst_hold(dst); + sk->sk_rx_dst = dst; + dst_release(old); + } + spin_unlock(&sk->sk_dst_lock); } /* @@ -1737,10 +1746,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = skb_steal_sock(skb); if (sk) { + struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst == NULL)) - udp_sk_rx_dst_set(sk, skb); + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.2 From e47eb5dfb296bf217e9ebee7b2f07486670b9c1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Dec 2013 10:53:46 -0800 Subject: udp: ipv4: do not use sk_dst_lock from softirq context Using sk_dst_lock from softirq context is not supported right now. Instead of adding BH protection everywhere, udp_sk_rx_dst_set() can instead use xchg(), as suggested by David. Reported-by: Fengguang Wu Fixes: 975022310233 ("udp: ipv4: must add synchronization in udp_sk_rx_dst_set()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 62c19fdd102d..f140048334ce 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1600,20 +1600,15 @@ static void flush_stack(struct sock **stack, unsigned int count, } /* For TCP sockets, sk_rx_dst is protected by socket lock - * For UDP, we use sk_dst_lock to guard against concurrent changes. + * For UDP, we use xchg() to guard against concurrent changes. */ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; - spin_lock(&sk->sk_dst_lock); - old = sk->sk_rx_dst; - if (likely(old != dst)) { - dst_hold(dst); - sk->sk_rx_dst = dst; - dst_release(old); - } - spin_unlock(&sk->sk_dst_lock); + dst_hold(dst); + old = xchg(&sk->sk_rx_dst, dst); + dst_release(old); } /* -- cgit v1.2.2 From 7a7ffbabf99445704be01bff5d7e360da908cf8e Mon Sep 17 00:00:00 2001 From: Wei-Chun Chao Date: Thu, 26 Dec 2013 13:10:22 -0800 Subject: ipv4: fix tunneled VM traffic over hw VXLAN/GRE GSO NIC VM to VM GSO traffic is broken if it goes through VXLAN or GRE tunnel and the physical NIC on the host supports hardware VXLAN/GRE GSO offload (e.g. bnx2x and next-gen mlx4). Two issues - (VXLAN) VM traffic has SKB_GSO_DODGY and SKB_GSO_UDP_TUNNEL with SKB_GSO_TCP/UDP set depending on the inner protocol. GSO header integrity check fails in udp4_ufo_fragment if inner protocol is TCP. Also gso_segs is calculated incorrectly using skb->len that includes tunnel header. Fix: robust check should only be applied to the inner packet. (VXLAN & GRE) Once GSO header integrity check passes, NULL segs is returned and the original skb is sent to hardware. However the tunnel header is already pulled. Fix: tunnel header needs to be restored so that hardware can perform GSO properly on the original packet. Signed-off-by: Wei-Chun Chao Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv4/udp.c') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f140048334ce..a7e4729e974b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2478,6 +2478,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; int mac_len = skb->mac_len; int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); __be16 protocol = skb->protocol; @@ -2497,8 +2498,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, /* segment inner packet. */ enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) + if (!segs || IS_ERR(segs)) { + skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset, + mac_len); goto out; + } outer_hlen = skb_tnl_header_len(skb); skb = segs; -- cgit v1.2.2