diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 7 | ||||
-rw-r--r-- | Documentation/networking/vrf.txt | 7 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 11 | ||||
-rw-r--r-- | net/ipv4/udp.c | 27 | ||||
-rw-r--r-- | net/ipv6/udp.c | 27 |
6 files changed, 66 insertions, 17 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17f2e7791042..fc73eeb7b3b8 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -721,6 +721,13 @@ tcp_challenge_ack_limit - INTEGER | |||
721 | 721 | ||
722 | UDP variables: | 722 | UDP variables: |
723 | 723 | ||
724 | udp_l3mdev_accept - BOOLEAN | ||
725 | Enabling this option allows a "global" bound socket to work | ||
726 | across L3 master domains (e.g., VRFs) with packets capable of | ||
727 | being received regardless of the L3 domain in which they | ||
728 | originated. Only valid when the kernel was compiled with | ||
729 | CONFIG_NET_L3_MASTER_DEV. | ||
730 | |||
724 | udp_mem - vector of 3 INTEGERs: min, pressure, max | 731 | udp_mem - vector of 3 INTEGERs: min, pressure, max |
725 | Number of pages allowed for queueing by all UDP sockets. | 732 | Number of pages allowed for queueing by all UDP sockets. |
726 | 733 | ||
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt index 755dab856392..3918dae964d4 100644 --- a/Documentation/networking/vrf.txt +++ b/Documentation/networking/vrf.txt | |||
@@ -98,10 +98,11 @@ VRF device: | |||
98 | 98 | ||
99 | or to specify the output device using cmsg and IP_PKTINFO. | 99 | or to specify the output device using cmsg and IP_PKTINFO. |
100 | 100 | ||
101 | TCP services running in the default VRF context (ie., not bound to any VRF | 101 | TCP & UDP services running in the default VRF context (ie., not bound |
102 | device) can work across all VRF domains by enabling the tcp_l3mdev_accept | 102 | to any VRF device) can work across all VRF domains by enabling the |
103 | sysctl option: | 103 | tcp_l3mdev_accept and udp_l3mdev_accept sysctl options: |
104 | sysctl -w net.ipv4.tcp_l3mdev_accept=1 | 104 | sysctl -w net.ipv4.tcp_l3mdev_accept=1 |
105 | sysctl -w net.ipv4.udp_l3mdev_accept=1 | ||
105 | 106 | ||
106 | netfilter rules on the VRF device can be used to limit access to services | 107 | netfilter rules on the VRF device can be used to limit access to services |
107 | running in the default VRF context as well. | 108 | running in the default VRF context as well. |
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e365732b8051..622d2da27135 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -124,6 +124,10 @@ struct netns_ipv4 { | |||
124 | struct inet_timewait_death_row tcp_death_row; | 124 | struct inet_timewait_death_row tcp_death_row; |
125 | int sysctl_max_syn_backlog; | 125 | int sysctl_max_syn_backlog; |
126 | 126 | ||
127 | #ifdef CONFIG_NET_L3_MASTER_DEV | ||
128 | int sysctl_udp_l3mdev_accept; | ||
129 | #endif | ||
130 | |||
127 | int sysctl_igmp_max_memberships; | 131 | int sysctl_igmp_max_memberships; |
128 | int sysctl_igmp_max_msf; | 132 | int sysctl_igmp_max_msf; |
129 | int sysctl_igmp_llm_reports; | 133 | int sysctl_igmp_llm_reports; |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1b861997fdc5..d6880a6149ee 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -1012,6 +1012,17 @@ static struct ctl_table ipv4_net_table[] = { | |||
1012 | .mode = 0644, | 1012 | .mode = 0644, |
1013 | .proc_handler = ipv4_privileged_ports, | 1013 | .proc_handler = ipv4_privileged_ports, |
1014 | }, | 1014 | }, |
1015 | #ifdef CONFIG_NET_L3_MASTER_DEV | ||
1016 | { | ||
1017 | .procname = "udp_l3mdev_accept", | ||
1018 | .data = &init_net.ipv4.sysctl_udp_l3mdev_accept, | ||
1019 | .maxlen = sizeof(int), | ||
1020 | .mode = 0644, | ||
1021 | .proc_handler = proc_dointvec_minmax, | ||
1022 | .extra1 = &zero, | ||
1023 | .extra2 = &one, | ||
1024 | }, | ||
1025 | #endif | ||
1015 | { } | 1026 | { } |
1016 | }; | 1027 | }; |
1017 | 1028 | ||
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d6dddcf59e79..cf6ba3387401 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -134,6 +134,17 @@ EXPORT_SYMBOL(udp_memory_allocated); | |||
134 | #define MAX_UDP_PORTS 65536 | 134 | #define MAX_UDP_PORTS 65536 |
135 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) | 135 | #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN) |
136 | 136 | ||
137 | /* IPCB reference means this can not be used from early demux */ | ||
138 | static bool udp_lib_exact_dif_match(struct net *net, struct sk_buff *skb) | ||
139 | { | ||
140 | #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) | ||
141 | if (!net->ipv4.sysctl_udp_l3mdev_accept && | ||
142 | skb && ipv4_l3mdev_skb(IPCB(skb)->flags)) | ||
143 | return true; | ||
144 | #endif | ||
145 | return false; | ||
146 | } | ||
147 | |||
137 | static int udp_lib_lport_inuse(struct net *net, __u16 num, | 148 | static int udp_lib_lport_inuse(struct net *net, __u16 num, |
138 | const struct udp_hslot *hslot, | 149 | const struct udp_hslot *hslot, |
139 | unsigned long *bitmap, | 150 | unsigned long *bitmap, |
@@ -369,7 +380,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) | |||
369 | 380 | ||
370 | static int compute_score(struct sock *sk, struct net *net, | 381 | static int compute_score(struct sock *sk, struct net *net, |
371 | __be32 saddr, __be16 sport, | 382 | __be32 saddr, __be16 sport, |
372 | __be32 daddr, unsigned short hnum, int dif) | 383 | __be32 daddr, unsigned short hnum, int dif, |
384 | bool exact_dif) | ||
373 | { | 385 | { |
374 | int score; | 386 | int score; |
375 | struct inet_sock *inet; | 387 | struct inet_sock *inet; |
@@ -400,7 +412,7 @@ static int compute_score(struct sock *sk, struct net *net, | |||
400 | score += 4; | 412 | score += 4; |
401 | } | 413 | } |
402 | 414 | ||
403 | if (sk->sk_bound_dev_if) { | 415 | if (sk->sk_bound_dev_if || exact_dif) { |
404 | if (sk->sk_bound_dev_if != dif) | 416 | if (sk->sk_bound_dev_if != dif) |
405 | return -1; | 417 | return -1; |
406 | score += 4; | 418 | score += 4; |
@@ -425,7 +437,7 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, | |||
425 | /* called with rcu_read_lock() */ | 437 | /* called with rcu_read_lock() */ |
426 | static struct sock *udp4_lib_lookup2(struct net *net, | 438 | static struct sock *udp4_lib_lookup2(struct net *net, |
427 | __be32 saddr, __be16 sport, | 439 | __be32 saddr, __be16 sport, |
428 | __be32 daddr, unsigned int hnum, int dif, | 440 | __be32 daddr, unsigned int hnum, int dif, bool exact_dif, |
429 | struct udp_hslot *hslot2, | 441 | struct udp_hslot *hslot2, |
430 | struct sk_buff *skb) | 442 | struct sk_buff *skb) |
431 | { | 443 | { |
@@ -437,7 +449,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, | |||
437 | badness = 0; | 449 | badness = 0; |
438 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { | 450 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { |
439 | score = compute_score(sk, net, saddr, sport, | 451 | score = compute_score(sk, net, saddr, sport, |
440 | daddr, hnum, dif); | 452 | daddr, hnum, dif, exact_dif); |
441 | if (score > badness) { | 453 | if (score > badness) { |
442 | reuseport = sk->sk_reuseport; | 454 | reuseport = sk->sk_reuseport; |
443 | if (reuseport) { | 455 | if (reuseport) { |
@@ -472,6 +484,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
472 | unsigned short hnum = ntohs(dport); | 484 | unsigned short hnum = ntohs(dport); |
473 | unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); | 485 | unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); |
474 | struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; | 486 | struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; |
487 | bool exact_dif = udp_lib_exact_dif_match(net, skb); | ||
475 | int score, badness, matches = 0, reuseport = 0; | 488 | int score, badness, matches = 0, reuseport = 0; |
476 | u32 hash = 0; | 489 | u32 hash = 0; |
477 | 490 | ||
@@ -484,7 +497,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
484 | 497 | ||
485 | result = udp4_lib_lookup2(net, saddr, sport, | 498 | result = udp4_lib_lookup2(net, saddr, sport, |
486 | daddr, hnum, dif, | 499 | daddr, hnum, dif, |
487 | hslot2, skb); | 500 | exact_dif, hslot2, skb); |
488 | if (!result) { | 501 | if (!result) { |
489 | unsigned int old_slot2 = slot2; | 502 | unsigned int old_slot2 = slot2; |
490 | hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); | 503 | hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); |
@@ -499,7 +512,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, | |||
499 | 512 | ||
500 | result = udp4_lib_lookup2(net, saddr, sport, | 513 | result = udp4_lib_lookup2(net, saddr, sport, |
501 | daddr, hnum, dif, | 514 | daddr, hnum, dif, |
502 | hslot2, skb); | 515 | exact_dif, hslot2, skb); |
503 | } | 516 | } |
504 | return result; | 517 | return result; |
505 | } | 518 | } |
@@ -508,7 +521,7 @@ begin: | |||
508 | badness = 0; | 521 | badness = 0; |
509 | sk_for_each_rcu(sk, &hslot->head) { | 522 | sk_for_each_rcu(sk, &hslot->head) { |
510 | score = compute_score(sk, net, saddr, sport, | 523 | score = compute_score(sk, net, saddr, sport, |
511 | daddr, hnum, dif); | 524 | daddr, hnum, dif, exact_dif); |
512 | if (score > badness) { | 525 | if (score > badness) { |
513 | reuseport = sk->sk_reuseport; | 526 | reuseport = sk->sk_reuseport; |
514 | if (reuseport) { | 527 | if (reuseport) { |
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 05d69324862e..b4c6516a3a0c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c | |||
@@ -55,6 +55,16 @@ | |||
55 | #include <trace/events/skb.h> | 55 | #include <trace/events/skb.h> |
56 | #include "udp_impl.h" | 56 | #include "udp_impl.h" |
57 | 57 | ||
58 | static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb) | ||
59 | { | ||
60 | #if defined(CONFIG_NET_L3_MASTER_DEV) | ||
61 | if (!net->ipv4.sysctl_udp_l3mdev_accept && | ||
62 | skb && ipv6_l3mdev_skb(IP6CB(skb)->flags)) | ||
63 | return true; | ||
64 | #endif | ||
65 | return false; | ||
66 | } | ||
67 | |||
58 | static u32 udp6_ehashfn(const struct net *net, | 68 | static u32 udp6_ehashfn(const struct net *net, |
59 | const struct in6_addr *laddr, | 69 | const struct in6_addr *laddr, |
60 | const u16 lport, | 70 | const u16 lport, |
@@ -118,7 +128,7 @@ static void udp_v6_rehash(struct sock *sk) | |||
118 | static int compute_score(struct sock *sk, struct net *net, | 128 | static int compute_score(struct sock *sk, struct net *net, |
119 | const struct in6_addr *saddr, __be16 sport, | 129 | const struct in6_addr *saddr, __be16 sport, |
120 | const struct in6_addr *daddr, unsigned short hnum, | 130 | const struct in6_addr *daddr, unsigned short hnum, |
121 | int dif) | 131 | int dif, bool exact_dif) |
122 | { | 132 | { |
123 | int score; | 133 | int score; |
124 | struct inet_sock *inet; | 134 | struct inet_sock *inet; |
@@ -149,7 +159,7 @@ static int compute_score(struct sock *sk, struct net *net, | |||
149 | score++; | 159 | score++; |
150 | } | 160 | } |
151 | 161 | ||
152 | if (sk->sk_bound_dev_if) { | 162 | if (sk->sk_bound_dev_if || exact_dif) { |
153 | if (sk->sk_bound_dev_if != dif) | 163 | if (sk->sk_bound_dev_if != dif) |
154 | return -1; | 164 | return -1; |
155 | score++; | 165 | score++; |
@@ -165,7 +175,7 @@ static int compute_score(struct sock *sk, struct net *net, | |||
165 | static struct sock *udp6_lib_lookup2(struct net *net, | 175 | static struct sock *udp6_lib_lookup2(struct net *net, |
166 | const struct in6_addr *saddr, __be16 sport, | 176 | const struct in6_addr *saddr, __be16 sport, |
167 | const struct in6_addr *daddr, unsigned int hnum, int dif, | 177 | const struct in6_addr *daddr, unsigned int hnum, int dif, |
168 | struct udp_hslot *hslot2, | 178 | bool exact_dif, struct udp_hslot *hslot2, |
169 | struct sk_buff *skb) | 179 | struct sk_buff *skb) |
170 | { | 180 | { |
171 | struct sock *sk, *result; | 181 | struct sock *sk, *result; |
@@ -176,7 +186,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, | |||
176 | badness = -1; | 186 | badness = -1; |
177 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { | 187 | udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { |
178 | score = compute_score(sk, net, saddr, sport, | 188 | score = compute_score(sk, net, saddr, sport, |
179 | daddr, hnum, dif); | 189 | daddr, hnum, dif, exact_dif); |
180 | if (score > badness) { | 190 | if (score > badness) { |
181 | reuseport = sk->sk_reuseport; | 191 | reuseport = sk->sk_reuseport; |
182 | if (reuseport) { | 192 | if (reuseport) { |
@@ -212,6 +222,7 @@ struct sock *__udp6_lib_lookup(struct net *net, | |||
212 | unsigned short hnum = ntohs(dport); | 222 | unsigned short hnum = ntohs(dport); |
213 | unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); | 223 | unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); |
214 | struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; | 224 | struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; |
225 | bool exact_dif = udp6_lib_exact_dif_match(net, skb); | ||
215 | int score, badness, matches = 0, reuseport = 0; | 226 | int score, badness, matches = 0, reuseport = 0; |
216 | u32 hash = 0; | 227 | u32 hash = 0; |
217 | 228 | ||
@@ -223,7 +234,7 @@ struct sock *__udp6_lib_lookup(struct net *net, | |||
223 | goto begin; | 234 | goto begin; |
224 | 235 | ||
225 | result = udp6_lib_lookup2(net, saddr, sport, | 236 | result = udp6_lib_lookup2(net, saddr, sport, |
226 | daddr, hnum, dif, | 237 | daddr, hnum, dif, exact_dif, |
227 | hslot2, skb); | 238 | hslot2, skb); |
228 | if (!result) { | 239 | if (!result) { |
229 | unsigned int old_slot2 = slot2; | 240 | unsigned int old_slot2 = slot2; |
@@ -239,7 +250,8 @@ struct sock *__udp6_lib_lookup(struct net *net, | |||
239 | 250 | ||
240 | result = udp6_lib_lookup2(net, saddr, sport, | 251 | result = udp6_lib_lookup2(net, saddr, sport, |
241 | daddr, hnum, dif, | 252 | daddr, hnum, dif, |
242 | hslot2, skb); | 253 | exact_dif, hslot2, |
254 | skb); | ||
243 | } | 255 | } |
244 | return result; | 256 | return result; |
245 | } | 257 | } |
@@ -247,7 +259,8 @@ begin: | |||
247 | result = NULL; | 259 | result = NULL; |
248 | badness = -1; | 260 | badness = -1; |
249 | sk_for_each_rcu(sk, &hslot->head) { | 261 | sk_for_each_rcu(sk, &hslot->head) { |
250 | score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); | 262 | score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, |
263 | exact_dif); | ||
251 | if (score > badness) { | 264 | if (score > badness) { |
252 | reuseport = sk->sk_reuseport; | 265 | reuseport = sk->sk_reuseport; |
253 | if (reuseport) { | 266 | if (reuseport) { |