aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStefano Brivio <sbrivio@redhat.com>2018-11-08 06:19:14 -0500
committerDavid S. Miller <davem@davemloft.net>2018-11-08 20:13:08 -0500
commita36e185e8c85523413c1ae3e03a0bdde5501f403 (patch)
tree74340b5913e7d77870196c5d1644efb36fb698fe
parent141b95d5519eafebfa3d8d3130a4f1cbb1eef622 (diff)
udp: Handle ICMP errors for tunnels with same destination port on both endpoints
For both IPv4 and IPv6, if we can't match errors to a socket, try tunnels before ignoring them. Look up a socket with the original source and destination ports as found in the UDP packet inside the ICMP payload, this will work for tunnels that force the same destination port for both endpoints, i.e. VXLAN and GENEVE. Actually, lwtunnels could break this assumption if they are configured by an external control plane to have different destination ports on the endpoints: in this case, we won't be able to trace ICMP messages back to them. For IPv6 redirect messages, call ip6_redirect() directly with the output interface argument set to the interface we received the packet from (as it's the very interface we should build the exception on), otherwise the new nexthop will be rejected. There's no such need for IPv4. Tunnels can now export an encap_err_lookup() operation that indicates a match. Pass the packet to the lookup function, and if the tunnel driver reports a matching association, continue with regular ICMP error handling. v2: - Added newline between network and transport header sets in __udp{4,6}_lib_err_encap() (David Miller) - Removed redundant skb_reset_network_header(skb); in __udp4_lib_err_encap() - Removed redundant reassignment of iph in __udp4_lib_err_encap() (Sabrina Dubroca) - Edited comment to __udp{4,6}_lib_err_encap() to reflect the fact this won't work with lwtunnels configured to use asymmetric ports. By the way, it's VXLAN, not VxLAN (Jiri Benc) Signed-off-by: Stefano Brivio <sbrivio@redhat.com> Reviewed-by: Sabrina Dubroca <sd@queasysnail.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/udp.h1
-rw-r--r--include/net/udp_tunnel.h3
-rw-r--r--net/ipv4/udp.c79
-rw-r--r--net/ipv4/udp_tunnel.c1
-rw-r--r--net/ipv6/udp.c89
5 files changed, 153 insertions, 20 deletions
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0a9c54e76305..2725c83395bf 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -77,6 +77,7 @@ struct udp_sock {
77 * For encapsulation sockets. 77 * For encapsulation sockets.
78 */ 78 */
79 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); 79 int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
80 int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
80 void (*encap_destroy)(struct sock *sk); 81 void (*encap_destroy)(struct sock *sk);
81 82
82 /* GRO functions for UDP socket */ 83 /* GRO functions for UDP socket */
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 3fbe56430e3b..dc8d804af3b4 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -64,6 +64,8 @@ static inline int udp_sock_create(struct net *net,
64} 64}
65 65
66typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); 66typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
67typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
68 struct sk_buff *skb);
67typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); 69typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
68typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, 70typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
69 struct list_head *head, 71 struct list_head *head,
@@ -76,6 +78,7 @@ struct udp_tunnel_sock_cfg {
76 /* Used for setting up udp_sock fields, see udp.h for details */ 78 /* Used for setting up udp_sock fields, see udp.h for details */
77 __u8 encap_type; 79 __u8 encap_type;
78 udp_tunnel_encap_rcv_t encap_rcv; 80 udp_tunnel_encap_rcv_t encap_rcv;
81 udp_tunnel_encap_err_lookup_t encap_err_lookup;
79 udp_tunnel_encap_destroy_t encap_destroy; 82 udp_tunnel_encap_destroy_t encap_destroy;
80 udp_tunnel_gro_receive_t gro_receive; 83 udp_tunnel_gro_receive_t gro_receive;
81 udp_tunnel_gro_complete_t gro_complete; 84 udp_tunnel_gro_complete_t gro_complete;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3488650b90ac..ce759b61f6cd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -583,6 +583,62 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
583 return true; 583 return true;
584} 584}
585 585
586DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
587void udp_encap_enable(void)
588{
589 static_branch_enable(&udp_encap_needed_key);
590}
591EXPORT_SYMBOL(udp_encap_enable);
592
593/* Try to match ICMP errors to UDP tunnels by looking up a socket without
594 * reversing source and destination port: this will match tunnels that force the
595 * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
596 * lwtunnels might actually break this assumption by being configured with
597 * different destination ports on endpoints, in this case we won't be able to
598 * trace ICMP messages back to them.
599 *
600 * Then ask the tunnel implementation to match the error against a valid
601 * association.
602 *
603 * Return the socket if we have a match.
604 */
605static struct sock *__udp4_lib_err_encap(struct net *net,
606 const struct iphdr *iph,
607 struct udphdr *uh,
608 struct udp_table *udptable,
609 struct sk_buff *skb)
610{
611 int (*lookup)(struct sock *sk, struct sk_buff *skb);
612 int network_offset, transport_offset;
613 struct udp_sock *up;
614 struct sock *sk;
615
616 sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
617 iph->saddr, uh->dest, skb->dev->ifindex, 0,
618 udptable, NULL);
619 if (!sk)
620 return NULL;
621
622 network_offset = skb_network_offset(skb);
623 transport_offset = skb_transport_offset(skb);
624
625 /* Network header needs to point to the outer IPv4 header inside ICMP */
626 skb_reset_network_header(skb);
627
628 /* Transport header needs to point to the UDP header */
629 skb_set_transport_header(skb, iph->ihl << 2);
630
631 up = udp_sk(sk);
632 lookup = READ_ONCE(up->encap_err_lookup);
633 if (!lookup || lookup(sk, skb))
634 sk = NULL;
635
636 skb_set_transport_header(skb, transport_offset);
637 skb_set_network_header(skb, network_offset);
638
639 return sk;
640}
641
586/* 642/*
587 * This routine is called by the ICMP module when it gets some 643 * This routine is called by the ICMP module when it gets some
588 * sort of error condition. If err < 0 then the socket should 644 * sort of error condition. If err < 0 then the socket should
@@ -601,6 +657,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
601 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); 657 struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
602 const int type = icmp_hdr(skb)->type; 658 const int type = icmp_hdr(skb)->type;
603 const int code = icmp_hdr(skb)->code; 659 const int code = icmp_hdr(skb)->code;
660 bool tunnel = false;
604 struct sock *sk; 661 struct sock *sk;
605 int harderr; 662 int harderr;
606 int err; 663 int err;
@@ -610,8 +667,15 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
610 iph->saddr, uh->source, skb->dev->ifindex, 667 iph->saddr, uh->source, skb->dev->ifindex,
611 inet_sdif(skb), udptable, NULL); 668 inet_sdif(skb), udptable, NULL);
612 if (!sk) { 669 if (!sk) {
613 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); 670 /* No socket for error: try tunnels before discarding */
614 return; /* No socket for error */ 671 if (static_branch_unlikely(&udp_encap_needed_key))
672 sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb);
673
674 if (!sk) {
675 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
676 return;
677 }
678 tunnel = true;
615 } 679 }
616 680
617 err = 0; 681 err = 0;
@@ -654,6 +718,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
654 * RFC1122: OK. Passes ICMP errors back to application, as per 718 * RFC1122: OK. Passes ICMP errors back to application, as per
655 * 4.1.3.3. 719 * 4.1.3.3.
656 */ 720 */
721 if (tunnel) {
722 /* ...not for tunnels though: we don't have a sending socket */
723 goto out;
724 }
657 if (!inet->recverr) { 725 if (!inet->recverr) {
658 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 726 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
659 goto out; 727 goto out;
@@ -1891,13 +1959,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
1891 return 0; 1959 return 0;
1892} 1960}
1893 1961
1894DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
1895void udp_encap_enable(void)
1896{
1897 static_branch_enable(&udp_encap_needed_key);
1898}
1899EXPORT_SYMBOL(udp_encap_enable);
1900
1901/* returns: 1962/* returns:
1902 * -1: error 1963 * -1: error
1903 * 0: success 1964 * 0: success
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6539ff15e9a3..d0c412fc56ad 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -68,6 +68,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
68 68
69 udp_sk(sk)->encap_type = cfg->encap_type; 69 udp_sk(sk)->encap_type = cfg->encap_type;
70 udp_sk(sk)->encap_rcv = cfg->encap_rcv; 70 udp_sk(sk)->encap_rcv = cfg->encap_rcv;
71 udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
71 udp_sk(sk)->encap_destroy = cfg->encap_destroy; 72 udp_sk(sk)->encap_destroy = cfg->encap_destroy;
72 udp_sk(sk)->gro_receive = cfg->gro_receive; 73 udp_sk(sk)->gro_receive = cfg->gro_receive;
73 udp_sk(sk)->gro_complete = cfg->gro_complete; 74 udp_sk(sk)->gro_complete = cfg->gro_complete;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c55698d19d68..1216c920f945 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -462,6 +462,61 @@ csum_copy_err:
462 goto try_again; 462 goto try_again;
463} 463}
464 464
465DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
466void udpv6_encap_enable(void)
467{
468 static_branch_enable(&udpv6_encap_needed_key);
469}
470EXPORT_SYMBOL(udpv6_encap_enable);
471
472/* Try to match ICMP errors to UDP tunnels by looking up a socket without
473 * reversing source and destination port: this will match tunnels that force the
474 * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
475 * lwtunnels might actually break this assumption by being configured with
476 * different destination ports on endpoints, in this case we won't be able to
477 * trace ICMP messages back to them.
478 *
479 * Then ask the tunnel implementation to match the error against a valid
480 * association.
481 *
482 * Return the socket if we have a match.
483 */
484static struct sock *__udp6_lib_err_encap(struct net *net,
485 const struct ipv6hdr *hdr, int offset,
486 struct udphdr *uh,
487 struct udp_table *udptable,
488 struct sk_buff *skb)
489{
490 int (*lookup)(struct sock *sk, struct sk_buff *skb);
491 int network_offset, transport_offset;
492 struct udp_sock *up;
493 struct sock *sk;
494
495 sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
496 &hdr->saddr, uh->dest,
497 inet6_iif(skb), 0, udptable, skb);
498 if (!sk)
499 return NULL;
500
501 network_offset = skb_network_offset(skb);
502 transport_offset = skb_transport_offset(skb);
503
504 /* Network header needs to point to the outer IPv6 header inside ICMP */
505 skb_reset_network_header(skb);
506
507 /* Transport header needs to point to the UDP header */
508 skb_set_transport_header(skb, offset);
509
510 up = udp_sk(sk);
511 lookup = READ_ONCE(up->encap_err_lookup);
512 if (!lookup || lookup(sk, skb))
513 sk = NULL;
514
515 skb_set_transport_header(skb, transport_offset);
516 skb_set_network_header(skb, network_offset);
517 return sk;
518}
519
465void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 520void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
466 u8 type, u8 code, int offset, __be32 info, 521 u8 type, u8 code, int offset, __be32 info,
467 struct udp_table *udptable) 522 struct udp_table *udptable)
@@ -471,6 +526,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
471 const struct in6_addr *saddr = &hdr->saddr; 526 const struct in6_addr *saddr = &hdr->saddr;
472 const struct in6_addr *daddr = &hdr->daddr; 527 const struct in6_addr *daddr = &hdr->daddr;
473 struct udphdr *uh = (struct udphdr *)(skb->data+offset); 528 struct udphdr *uh = (struct udphdr *)(skb->data+offset);
529 bool tunnel = false;
474 struct sock *sk; 530 struct sock *sk;
475 int harderr; 531 int harderr;
476 int err; 532 int err;
@@ -479,9 +535,18 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
479 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, 535 sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
480 inet6_iif(skb), inet6_sdif(skb), udptable, skb); 536 inet6_iif(skb), inet6_sdif(skb), udptable, skb);
481 if (!sk) { 537 if (!sk) {
482 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 538 /* No socket for error: try tunnels before discarding */
483 ICMP6_MIB_INERRORS); 539 if (static_branch_unlikely(&udpv6_encap_needed_key)) {
484 return; 540 sk = __udp6_lib_err_encap(net, hdr, offset, uh,
541 udptable, skb);
542 }
543
544 if (!sk) {
545 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
546 ICMP6_MIB_INERRORS);
547 return;
548 }
549 tunnel = true;
485 } 550 }
486 551
487 harderr = icmpv6_err_convert(type, code, &err); 552 harderr = icmpv6_err_convert(type, code, &err);
@@ -495,10 +560,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
495 harderr = 1; 560 harderr = 1;
496 } 561 }
497 if (type == NDISC_REDIRECT) { 562 if (type == NDISC_REDIRECT) {
498 ip6_sk_redirect(skb, sk); 563 if (tunnel) {
564 ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
565 sk->sk_mark, sk->sk_uid);
566 } else {
567 ip6_sk_redirect(skb, sk);
568 }
499 goto out; 569 goto out;
500 } 570 }
501 571
572 /* Tunnels don't have an application socket: don't pass errors back */
573 if (tunnel)
574 goto out;
575
502 if (!np->recverr) { 576 if (!np->recverr) {
503 if (!harderr || sk->sk_state != TCP_ESTABLISHED) 577 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
504 goto out; 578 goto out;
@@ -547,13 +621,6 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
547 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); 621 __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
548} 622}
549 623
550DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
551void udpv6_encap_enable(void)
552{
553 static_branch_enable(&udpv6_encap_needed_key);
554}
555EXPORT_SYMBOL(udpv6_encap_enable);
556
557static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) 624static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
558{ 625{
559 struct udp_sock *up = udp_sk(sk); 626 struct udp_sock *up = udp_sk(sk);