diff options
-rw-r--r-- | include/linux/udp.h | 1 | ||||
-rw-r--r-- | include/net/udp_tunnel.h | 3 | ||||
-rw-r--r-- | net/ipv4/udp.c | 79 | ||||
-rw-r--r-- | net/ipv4/udp_tunnel.c | 1 | ||||
-rw-r--r-- | net/ipv6/udp.c | 89 |
5 files changed, 153 insertions, 20 deletions
diff --git a/include/linux/udp.h b/include/linux/udp.h index 0a9c54e76305..2725c83395bf 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h | |||
@@ -77,6 +77,7 @@ struct udp_sock { | |||
77 | * For encapsulation sockets. | 77 | * For encapsulation sockets. |
78 | */ | 78 | */ |
79 | int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); | 79 | int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); |
80 | int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); | ||
80 | void (*encap_destroy)(struct sock *sk); | 81 | void (*encap_destroy)(struct sock *sk); |
81 | 82 | ||
82 | /* GRO functions for UDP socket */ | 83 | /* GRO functions for UDP socket */ |
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 3fbe56430e3b..dc8d804af3b4 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h | |||
@@ -64,6 +64,8 @@ static inline int udp_sock_create(struct net *net, | |||
64 | } | 64 | } |
65 | 65 | ||
66 | typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); | 66 | typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); |
67 | typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, | ||
68 | struct sk_buff *skb); | ||
67 | typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); | 69 | typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); |
68 | typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, | 70 | typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, |
69 | struct list_head *head, | 71 | struct list_head *head, |
@@ -76,6 +78,7 @@ struct udp_tunnel_sock_cfg { | |||
76 | /* Used for setting up udp_sock fields, see udp.h for details */ | 78 | /* Used for setting up udp_sock fields, see udp.h for details */ |
77 | __u8 encap_type; | 79 | __u8 encap_type; |
78 | udp_tunnel_encap_rcv_t encap_rcv; | 80 | udp_tunnel_encap_rcv_t encap_rcv; |
81 | udp_tunnel_encap_err_lookup_t encap_err_lookup; | ||
79 | udp_tunnel_encap_destroy_t encap_destroy; | 82 | udp_tunnel_encap_destroy_t encap_destroy; |
80 | udp_tunnel_gro_receive_t gro_receive; | 83 | udp_tunnel_gro_receive_t gro_receive; |
81 | udp_tunnel_gro_complete_t gro_complete; | 84 | udp_tunnel_gro_complete_t gro_complete; |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 3488650b90ac..ce759b61f6cd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -583,6 +583,62 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, | |||
583 | return true; | 583 | return true; |
584 | } | 584 | } |
585 | 585 | ||
586 | DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); | ||
587 | void udp_encap_enable(void) | ||
588 | { | ||
589 | static_branch_enable(&udp_encap_needed_key); | ||
590 | } | ||
591 | EXPORT_SYMBOL(udp_encap_enable); | ||
592 | |||
593 | /* Try to match ICMP errors to UDP tunnels by looking up a socket without | ||
594 | * reversing source and destination port: this will match tunnels that force the | ||
595 | * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that | ||
596 | * lwtunnels might actually break this assumption by being configured with | ||
597 | * different destination ports on endpoints, in this case we won't be able to | ||
598 | * trace ICMP messages back to them. | ||
599 | * | ||
600 | * Then ask the tunnel implementation to match the error against a valid | ||
601 | * association. | ||
602 | * | ||
603 | * Return the socket if we have a match. | ||
604 | */ | ||
605 | static struct sock *__udp4_lib_err_encap(struct net *net, | ||
606 | const struct iphdr *iph, | ||
607 | struct udphdr *uh, | ||
608 | struct udp_table *udptable, | ||
609 | struct sk_buff *skb) | ||
610 | { | ||
611 | int (*lookup)(struct sock *sk, struct sk_buff *skb); | ||
612 | int network_offset, transport_offset; | ||
613 | struct udp_sock *up; | ||
614 | struct sock *sk; | ||
615 | |||
616 | sk = __udp4_lib_lookup(net, iph->daddr, uh->source, | ||
617 | iph->saddr, uh->dest, skb->dev->ifindex, 0, | ||
618 | udptable, NULL); | ||
619 | if (!sk) | ||
620 | return NULL; | ||
621 | |||
622 | network_offset = skb_network_offset(skb); | ||
623 | transport_offset = skb_transport_offset(skb); | ||
624 | |||
625 | /* Network header needs to point to the outer IPv4 header inside ICMP */ | ||
626 | skb_reset_network_header(skb); | ||
627 | |||
628 | /* Transport header needs to point to the UDP header */ | ||
629 | skb_set_transport_header(skb, iph->ihl << 2); | ||
630 | |||
631 | up = udp_sk(sk); | ||
632 | lookup = READ_ONCE(up->encap_err_lookup); | ||
633 | if (!lookup || lookup(sk, skb)) | ||
634 | sk = NULL; | ||
635 | |||
636 | skb_set_transport_header(skb, transport_offset); | ||
637 | skb_set_network_header(skb, network_offset); | ||
638 | |||
639 | return sk; | ||
640 | } | ||
641 | |||
586 | /* | 642 | /* |
587 | * This routine is called by the ICMP module when it gets some | 643 | * This routine is called by the ICMP module when it gets some |
588 | * sort of error condition. If err < 0 then the socket should | 644 | * sort of error condition. If err < 0 then the socket should |
@@ -601,6 +657,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
601 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); | 657 | struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); |
602 | const int type = icmp_hdr(skb)->type; | 658 | const int type = icmp_hdr(skb)->type; |
603 | const int code = icmp_hdr(skb)->code; | 659 | const int code = icmp_hdr(skb)->code; |
660 | bool tunnel = false; | ||
604 | struct sock *sk; | 661 | struct sock *sk; |
605 | int harderr; | 662 | int harderr; |
606 | int err; | 663 | int err; |
@@ -610,8 +667,15 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
610 | iph->saddr, uh->source, skb->dev->ifindex, | 667 | iph->saddr, uh->source, skb->dev->ifindex, |
611 | inet_sdif(skb), udptable, NULL); | 668 | inet_sdif(skb), udptable, NULL); |
612 | if (!sk) { | 669 | if (!sk) { |
613 | __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); | 670 | /* No socket for error: try tunnels before discarding */ |
614 | return; /* No socket for error */ | 671 | if (static_branch_unlikely(&udp_encap_needed_key)) |
672 | sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb); | ||
673 | |||
674 | if (!sk) { | ||
675 | __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); | ||
676 | return; | ||
677 | } | ||
678 | tunnel = true; | ||
615 | } | 679 | } |
616 | 680 | ||
617 | err = 0; | 681 | err = 0; |
@@ -654,6 +718,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) | |||
654 | * RFC1122: OK. Passes ICMP errors back to application, as per | 718 | * RFC1122: OK. Passes ICMP errors back to application, as per |
655 | * 4.1.3.3. | 719 | * 4.1.3.3. |
656 | */ | 720 | */ |
721 | if (tunnel) { | ||
722 | /* ...not for tunnels though: we don't have a sending socket */ | ||
723 | goto out; | ||
724 | } | ||
657 | if (!inet->recverr) { | 725 | if (!inet->recverr) { |
658 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | 726 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) |
659 | goto out; | 727 | goto out; |
@@ -1891,13 +1959,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) | |||
1891 | return 0; | 1959 | return 0; |
1892 | } | 1960 | } |
1893 | 1961 | ||
1894 | DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); | ||
1895 | void udp_encap_enable(void) | ||
1896 | { | ||
1897 | static_branch_enable(&udp_encap_needed_key); | ||
1898 | } | ||
1899 | EXPORT_SYMBOL(udp_encap_enable); | ||
1900 | |||
1901 | /* returns: | 1962 | /* returns: |
1902 | * -1: error | 1963 | * -1: error |
1903 | * 0: success | 1964 | * 0: success |
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 6539ff15e9a3..d0c412fc56ad 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c | |||
@@ -68,6 +68,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, | |||
68 | 68 | ||
69 | udp_sk(sk)->encap_type = cfg->encap_type; | 69 | udp_sk(sk)->encap_type = cfg->encap_type; |
70 | udp_sk(sk)->encap_rcv = cfg->encap_rcv; | 70 | udp_sk(sk)->encap_rcv = cfg->encap_rcv; |
71 | udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; | ||
71 | udp_sk(sk)->encap_destroy = cfg->encap_destroy; | 72 | udp_sk(sk)->encap_destroy = cfg->encap_destroy; |
72 | udp_sk(sk)->gro_receive = cfg->gro_receive; | 73 | udp_sk(sk)->gro_receive = cfg->gro_receive; |
73 | udp_sk(sk)->gro_complete = cfg->gro_complete; | 74 | udp_sk(sk)->gro_complete = cfg->gro_complete; |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c55698d19d68..1216c920f945 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -462,6 +462,61 @@ csum_copy_err: | |||
462 | goto try_again; | 462 | goto try_again; |
463 | } | 463 | } |
464 | 464 | ||
465 | DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); | ||
466 | void udpv6_encap_enable(void) | ||
467 | { | ||
468 | static_branch_enable(&udpv6_encap_needed_key); | ||
469 | } | ||
470 | EXPORT_SYMBOL(udpv6_encap_enable); | ||
471 | |||
472 | /* Try to match ICMP errors to UDP tunnels by looking up a socket without | ||
473 | * reversing source and destination port: this will match tunnels that force the | ||
474 | * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that | ||
475 | * lwtunnels might actually break this assumption by being configured with | ||
476 | * different destination ports on endpoints, in this case we won't be able to | ||
477 | * trace ICMP messages back to them. | ||
478 | * | ||
479 | * Then ask the tunnel implementation to match the error against a valid | ||
480 | * association. | ||
481 | * | ||
482 | * Return the socket if we have a match. | ||
483 | */ | ||
484 | static struct sock *__udp6_lib_err_encap(struct net *net, | ||
485 | const struct ipv6hdr *hdr, int offset, | ||
486 | struct udphdr *uh, | ||
487 | struct udp_table *udptable, | ||
488 | struct sk_buff *skb) | ||
489 | { | ||
490 | int (*lookup)(struct sock *sk, struct sk_buff *skb); | ||
491 | int network_offset, transport_offset; | ||
492 | struct udp_sock *up; | ||
493 | struct sock *sk; | ||
494 | |||
495 | sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, | ||
496 | &hdr->saddr, uh->dest, | ||
497 | inet6_iif(skb), 0, udptable, skb); | ||
498 | if (!sk) | ||
499 | return NULL; | ||
500 | |||
501 | network_offset = skb_network_offset(skb); | ||
502 | transport_offset = skb_transport_offset(skb); | ||
503 | |||
504 | /* Network header needs to point to the outer IPv6 header inside ICMP */ | ||
505 | skb_reset_network_header(skb); | ||
506 | |||
507 | /* Transport header needs to point to the UDP header */ | ||
508 | skb_set_transport_header(skb, offset); | ||
509 | |||
510 | up = udp_sk(sk); | ||
511 | lookup = READ_ONCE(up->encap_err_lookup); | ||
512 | if (!lookup || lookup(sk, skb)) | ||
513 | sk = NULL; | ||
514 | |||
515 | skb_set_transport_header(skb, transport_offset); | ||
516 | skb_set_network_header(skb, network_offset); | ||
517 | return sk; | ||
518 | } | ||
519 | |||
465 | void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | 520 | void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, |
466 | u8 type, u8 code, int offset, __be32 info, | 521 | u8 type, u8 code, int offset, __be32 info, |
467 | struct udp_table *udptable) | 522 | struct udp_table *udptable) |
@@ -471,6 +526,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
471 | const struct in6_addr *saddr = &hdr->saddr; | 526 | const struct in6_addr *saddr = &hdr->saddr; |
472 | const struct in6_addr *daddr = &hdr->daddr; | 527 | const struct in6_addr *daddr = &hdr->daddr; |
473 | struct udphdr *uh = (struct udphdr *)(skb->data+offset); | 528 | struct udphdr *uh = (struct udphdr *)(skb->data+offset); |
529 | bool tunnel = false; | ||
474 | struct sock *sk; | 530 | struct sock *sk; |
475 | int harderr; | 531 | int harderr; |
476 | int err; | 532 | int err; |
@@ -479,9 +535,18 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
479 | sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, | 535 | sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, |
480 | inet6_iif(skb), inet6_sdif(skb), udptable, skb); | 536 | inet6_iif(skb), inet6_sdif(skb), udptable, skb); |
481 | if (!sk) { | 537 | if (!sk) { |
482 | __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), | 538 | /* No socket for error: try tunnels before discarding */ |
483 | ICMP6_MIB_INERRORS); | 539 | if (static_branch_unlikely(&udpv6_encap_needed_key)) { |
484 | return; | 540 | sk = __udp6_lib_err_encap(net, hdr, offset, uh, |
541 | udptable, skb); | ||
542 | } | ||
543 | |||
544 | if (!sk) { | ||
545 | __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), | ||
546 | ICMP6_MIB_INERRORS); | ||
547 | return; | ||
548 | } | ||
549 | tunnel = true; | ||
485 | } | 550 | } |
486 | 551 | ||
487 | harderr = icmpv6_err_convert(type, code, &err); | 552 | harderr = icmpv6_err_convert(type, code, &err); |
@@ -495,10 +560,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, | |||
495 | harderr = 1; | 560 | harderr = 1; |
496 | } | 561 | } |
497 | if (type == NDISC_REDIRECT) { | 562 | if (type == NDISC_REDIRECT) { |
498 | ip6_sk_redirect(skb, sk); | 563 | if (tunnel) { |
564 | ip6_redirect(skb, sock_net(sk), inet6_iif(skb), | ||
565 | sk->sk_mark, sk->sk_uid); | ||
566 | } else { | ||
567 | ip6_sk_redirect(skb, sk); | ||
568 | } | ||
499 | goto out; | 569 | goto out; |
500 | } | 570 | } |
501 | 571 | ||
572 | /* Tunnels don't have an application socket: don't pass errors back */ | ||
573 | if (tunnel) | ||
574 | goto out; | ||
575 | |||
502 | if (!np->recverr) { | 576 | if (!np->recverr) { |
503 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) | 577 | if (!harderr || sk->sk_state != TCP_ESTABLISHED) |
504 | goto out; | 578 | goto out; |
@@ -547,13 +621,6 @@ static __inline__ void udpv6_err(struct sk_buff *skb, | |||
547 | __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); | 621 | __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); |
548 | } | 622 | } |
549 | 623 | ||
550 | DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); | ||
551 | void udpv6_encap_enable(void) | ||
552 | { | ||
553 | static_branch_enable(&udpv6_encap_needed_key); | ||
554 | } | ||
555 | EXPORT_SYMBOL(udpv6_encap_enable); | ||
556 | |||
557 | static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) | 624 | static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) |
558 | { | 625 | { |
559 | struct udp_sock *up = udp_sk(sk); | 626 | struct udp_sock *up = udp_sk(sk); |