aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt12
-rw-r--r--Documentation/networking/vrf.txt13
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--include/net/raw.h1
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/raw.c28
-rw-r--r--net/ipv4/sysctl_net_ipv4.c11
7 files changed, 68 insertions, 2 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 32b21571adfe..aa9e6a331679 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -370,6 +370,7 @@ tcp_l3mdev_accept - BOOLEAN
370 derived from the listen socket to be bound to the L3 domain in 370 derived from the listen socket to be bound to the L3 domain in
371 which the packets originated. Only valid when the kernel was 371 which the packets originated. Only valid when the kernel was
372 compiled with CONFIG_NET_L3_MASTER_DEV. 372 compiled with CONFIG_NET_L3_MASTER_DEV.
373 Default: 0 (disabled)
373 374
374tcp_low_latency - BOOLEAN 375tcp_low_latency - BOOLEAN
375 This is a legacy option, it has no effect anymore. 376 This is a legacy option, it has no effect anymore.
@@ -773,6 +774,7 @@ udp_l3mdev_accept - BOOLEAN
773 being received regardless of the L3 domain in which they 774 being received regardless of the L3 domain in which they
774 originated. Only valid when the kernel was compiled with 775 originated. Only valid when the kernel was compiled with
775 CONFIG_NET_L3_MASTER_DEV. 776 CONFIG_NET_L3_MASTER_DEV.
777 Default: 0 (disabled)
776 778
777udp_mem - vector of 3 INTEGERs: min, pressure, max 779udp_mem - vector of 3 INTEGERs: min, pressure, max
778 Number of pages allowed for queueing by all UDP sockets. 780 Number of pages allowed for queueing by all UDP sockets.
@@ -799,6 +801,16 @@ udp_wmem_min - INTEGER
799 total pages of UDP sockets exceed udp_mem pressure. The unit is byte. 801 total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
800 Default: 4K 802 Default: 4K
801 803
804RAW variables:
805
806raw_l3mdev_accept - BOOLEAN
807 Enabling this option allows a "global" bound socket to work
808 across L3 master domains (e.g., VRFs) with packets capable of
809 being received regardless of the L3 domain in which they
810 originated. Only valid when the kernel was compiled with
811 CONFIG_NET_L3_MASTER_DEV.
812 Default: 1 (enabled)
813
802CIPSOv4 Variables: 814CIPSOv4 Variables:
803 815
804cipso_cache_enable - BOOLEAN 816cipso_cache_enable - BOOLEAN
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index d4b129402d57..a5f103b083a0 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -111,9 +111,22 @@ the same port if they bind to an l3mdev.
111TCP & UDP services running in the default VRF context (ie., not bound 111TCP & UDP services running in the default VRF context (ie., not bound
112to any VRF device) can work across all VRF domains by enabling the 112to any VRF device) can work across all VRF domains by enabling the
113tcp_l3mdev_accept and udp_l3mdev_accept sysctl options: 113tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
114
114 sysctl -w net.ipv4.tcp_l3mdev_accept=1 115 sysctl -w net.ipv4.tcp_l3mdev_accept=1
115 sysctl -w net.ipv4.udp_l3mdev_accept=1 116 sysctl -w net.ipv4.udp_l3mdev_accept=1
116 117
118These options are disabled by default so that a socket in a VRF is only
119selected for packets in that VRF. There is a similar option for RAW
120sockets, which is enabled by default for reasons of backwards compatibility.
121This is so as to specify the output device with cmsg and IP_PKTINFO, but
122using a socket not bound to the corresponding VRF. This allows e.g. older ping
123implementations to be run with specifying the device but without executing it
124in the VRF. This option can be disabled so that packets received in a VRF
125context are only handled by a raw socket bound to the VRF, and packets in the
126default VRF are only handled by a socket not bound to any VRF:
127
128 sysctl -w net.ipv4.raw_l3mdev_accept=0
129
117netfilter rules on the VRF device can be used to limit access to services 130netfilter rules on the VRF device can be used to limit access to services
118running in the default VRF context as well. 131running in the default VRF context as well.
119 132
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index e47503b4e4d1..104a6669e344 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -103,6 +103,9 @@ struct netns_ipv4 {
103 /* Shall we try to damage output packets if routing dev changes? */ 103 /* Shall we try to damage output packets if routing dev changes? */
104 int sysctl_ip_dynaddr; 104 int sysctl_ip_dynaddr;
105 int sysctl_ip_early_demux; 105 int sysctl_ip_early_demux;
106#ifdef CONFIG_NET_L3_MASTER_DEV
107 int sysctl_raw_l3mdev_accept;
108#endif
106 int sysctl_tcp_early_demux; 109 int sysctl_tcp_early_demux;
107 int sysctl_udp_early_demux; 110 int sysctl_udp_early_demux;
108 111
diff --git a/include/net/raw.h b/include/net/raw.h
index 9c9fa98a91a4..20ebf0b3dfa8 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v);
61 61
62int raw_hash_sk(struct sock *sk); 62int raw_hash_sk(struct sock *sk);
63void raw_unhash_sk(struct sock *sk); 63void raw_unhash_sk(struct sock *sk);
64void raw_init(void);
64 65
65struct raw_sock { 66struct raw_sock {
66 /* inet_sock has to be the first member */ 67 /* inet_sock has to be the first member */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1fbe2f815474..07749c5b0a50 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1964,6 +1964,8 @@ static int __init inet_init(void)
1964 /* Add UDP-Lite (RFC 3828) */ 1964 /* Add UDP-Lite (RFC 3828) */
1965 udplite4_register(); 1965 udplite4_register();
1966 1966
1967 raw_init();
1968
1967 ping_init(); 1969 ping_init();
1968 1970
1969 /* 1971 /*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8ca3eb06ba04..1ebd29abe79c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -805,7 +805,7 @@ out:
805 return copied; 805 return copied;
806} 806}
807 807
808static int raw_init(struct sock *sk) 808static int raw_sk_init(struct sock *sk)
809{ 809{
810 struct raw_sock *rp = raw_sk(sk); 810 struct raw_sock *rp = raw_sk(sk);
811 811
@@ -970,7 +970,7 @@ struct proto raw_prot = {
970 .connect = ip4_datagram_connect, 970 .connect = ip4_datagram_connect,
971 .disconnect = __udp_disconnect, 971 .disconnect = __udp_disconnect,
972 .ioctl = raw_ioctl, 972 .ioctl = raw_ioctl,
973 .init = raw_init, 973 .init = raw_sk_init,
974 .setsockopt = raw_setsockopt, 974 .setsockopt = raw_setsockopt,
975 .getsockopt = raw_getsockopt, 975 .getsockopt = raw_getsockopt,
976 .sendmsg = raw_sendmsg, 976 .sendmsg = raw_sendmsg,
@@ -1133,4 +1133,28 @@ void __init raw_proc_exit(void)
1133{ 1133{
1134 unregister_pernet_subsys(&raw_net_ops); 1134 unregister_pernet_subsys(&raw_net_ops);
1135} 1135}
1136
1137static void raw_sysctl_init_net(struct net *net)
1138{
1139#ifdef CONFIG_NET_L3_MASTER_DEV
1140 net->ipv4.sysctl_raw_l3mdev_accept = 1;
1141#endif
1142}
1143
1144static int __net_init raw_sysctl_init(struct net *net)
1145{
1146 raw_sysctl_init_net(net);
1147 return 0;
1148}
1149
1150static struct pernet_operations __net_initdata raw_sysctl_ops = {
1151 .init = raw_sysctl_init,
1152};
1153
1154void __init raw_init(void)
1155{
1156 raw_sysctl_init_net(&init_net);
1157 if (register_pernet_subsys(&raw_sysctl_ops))
1158 panic("RAW: failed to init sysctl parameters.\n");
1159}
1136#endif /* CONFIG_PROC_FS */ 1160#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 891ed2f91467..ba0fc4b18465 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -602,6 +602,17 @@ static struct ctl_table ipv4_net_table[] = {
602 .mode = 0644, 602 .mode = 0644,
603 .proc_handler = ipv4_ping_group_range, 603 .proc_handler = ipv4_ping_group_range,
604 }, 604 },
605#ifdef CONFIG_NET_L3_MASTER_DEV
606 {
607 .procname = "raw_l3mdev_accept",
608 .data = &init_net.ipv4.sysctl_raw_l3mdev_accept,
609 .maxlen = sizeof(int),
610 .mode = 0644,
611 .proc_handler = proc_dointvec_minmax,
612 .extra1 = &zero,
613 .extra2 = &one,
614 },
615#endif
605 { 616 {
606 .procname = "tcp_ecn", 617 .procname = "tcp_ecn",
607 .data = &init_net.ipv4.sysctl_tcp_ecn, 618 .data = &init_net.ipv4.sysctl_tcp_ecn,