diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 12 | ||||
-rw-r--r-- | Documentation/networking/vrf.txt | 13 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 3 | ||||
-rw-r--r-- | include/net/raw.h | 1 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 28 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 11 |
7 files changed, 68 insertions, 2 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 32b21571adfe..aa9e6a331679 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -370,6 +370,7 @@ tcp_l3mdev_accept - BOOLEAN | |||
370 | derived from the listen socket to be bound to the L3 domain in | 370 | derived from the listen socket to be bound to the L3 domain in |
371 | which the packets originated. Only valid when the kernel was | 371 | which the packets originated. Only valid when the kernel was |
372 | compiled with CONFIG_NET_L3_MASTER_DEV. | 372 | compiled with CONFIG_NET_L3_MASTER_DEV. |
373 | Default: 0 (disabled) | ||
373 | 374 | ||
374 | tcp_low_latency - BOOLEAN | 375 | tcp_low_latency - BOOLEAN |
375 | This is a legacy option, it has no effect anymore. | 376 | This is a legacy option, it has no effect anymore. |
@@ -773,6 +774,7 @@ udp_l3mdev_accept - BOOLEAN | |||
773 | being received regardless of the L3 domain in which they | 774 | being received regardless of the L3 domain in which they |
774 | originated. Only valid when the kernel was compiled with | 775 | originated. Only valid when the kernel was compiled with |
775 | CONFIG_NET_L3_MASTER_DEV. | 776 | CONFIG_NET_L3_MASTER_DEV. |
777 | Default: 0 (disabled) | ||
776 | 778 | ||
777 | udp_mem - vector of 3 INTEGERs: min, pressure, max | 779 | udp_mem - vector of 3 INTEGERs: min, pressure, max |
778 | Number of pages allowed for queueing by all UDP sockets. | 780 | Number of pages allowed for queueing by all UDP sockets. |
@@ -799,6 +801,16 @@ udp_wmem_min - INTEGER | |||
799 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. | 801 | total pages of UDP sockets exceed udp_mem pressure. The unit is byte. |
800 | Default: 4K | 802 | Default: 4K |
801 | 803 | ||
804 | RAW variables: | ||
805 | |||
806 | raw_l3mdev_accept - BOOLEAN | ||
807 | Enabling this option allows a "global" bound socket to work | ||
808 | across L3 master domains (e.g., VRFs) with packets capable of | ||
809 | being received regardless of the L3 domain in which they | ||
810 | originated. Only valid when the kernel was compiled with | ||
811 | CONFIG_NET_L3_MASTER_DEV. | ||
812 | Default: 1 (enabled) | ||
813 | |||
802 | CIPSOv4 Variables: | 814 | CIPSOv4 Variables: |
803 | 815 | ||
804 | cipso_cache_enable - BOOLEAN | 816 | cipso_cache_enable - BOOLEAN |
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt index d4b129402d57..a5f103b083a0 100644 --- a/Documentation/networking/vrf.txt +++ b/Documentation/networking/vrf.txt | |||
@@ -111,9 +111,22 @@ the same port if they bind to an l3mdev. | |||
111 | TCP & UDP services running in the default VRF context (ie., not bound | 111 | TCP & UDP services running in the default VRF context (ie., not bound |
112 | to any VRF device) can work across all VRF domains by enabling the | 112 | to any VRF device) can work across all VRF domains by enabling the |
113 | tcp_l3mdev_accept and udp_l3mdev_accept sysctl options: | 113 | tcp_l3mdev_accept and udp_l3mdev_accept sysctl options: |
114 | |||
114 | sysctl -w net.ipv4.tcp_l3mdev_accept=1 | 115 | sysctl -w net.ipv4.tcp_l3mdev_accept=1 |
115 | sysctl -w net.ipv4.udp_l3mdev_accept=1 | 116 | sysctl -w net.ipv4.udp_l3mdev_accept=1 |
116 | 117 | ||
118 | These options are disabled by default so that a socket in a VRF is only | ||
119 | selected for packets in that VRF. There is a similar option for RAW | ||
120 | sockets, which is enabled by default for reasons of backwards compatibility. | ||
121 | This is so as to specify the output device with cmsg and IP_PKTINFO, but | ||
122 | using a socket not bound to the corresponding VRF. This allows e.g. older ping | ||
123 | implementations to be run with specifying the device but without executing it | ||
124 | in the VRF. This option can be disabled so that packets received in a VRF | ||
125 | context are only handled by a raw socket bound to the VRF, and packets in the | ||
126 | default VRF are only handled by a socket not bound to any VRF: | ||
127 | |||
128 | sysctl -w net.ipv4.raw_l3mdev_accept=0 | ||
129 | |||
117 | netfilter rules on the VRF device can be used to limit access to services | 130 | netfilter rules on the VRF device can be used to limit access to services |
118 | running in the default VRF context as well. | 131 | running in the default VRF context as well. |
119 | 132 | ||
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index e47503b4e4d1..104a6669e344 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -103,6 +103,9 @@ struct netns_ipv4 { | |||
103 | /* Shall we try to damage output packets if routing dev changes? */ | 103 | /* Shall we try to damage output packets if routing dev changes? */ |
104 | int sysctl_ip_dynaddr; | 104 | int sysctl_ip_dynaddr; |
105 | int sysctl_ip_early_demux; | 105 | int sysctl_ip_early_demux; |
106 | #ifdef CONFIG_NET_L3_MASTER_DEV | ||
107 | int sysctl_raw_l3mdev_accept; | ||
108 | #endif | ||
106 | int sysctl_tcp_early_demux; | 109 | int sysctl_tcp_early_demux; |
107 | int sysctl_udp_early_demux; | 110 | int sysctl_udp_early_demux; |
108 | 111 | ||
diff --git a/include/net/raw.h b/include/net/raw.h index 9c9fa98a91a4..20ebf0b3dfa8 100644 --- a/include/net/raw.h +++ b/include/net/raw.h | |||
@@ -61,6 +61,7 @@ void raw_seq_stop(struct seq_file *seq, void *v); | |||
61 | 61 | ||
62 | int raw_hash_sk(struct sock *sk); | 62 | int raw_hash_sk(struct sock *sk); |
63 | void raw_unhash_sk(struct sock *sk); | 63 | void raw_unhash_sk(struct sock *sk); |
64 | void raw_init(void); | ||
64 | 65 | ||
65 | struct raw_sock { | 66 | struct raw_sock { |
66 | /* inet_sock has to be the first member */ | 67 | /* inet_sock has to be the first member */ |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1fbe2f815474..07749c5b0a50 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1964,6 +1964,8 @@ static int __init inet_init(void) | |||
1964 | /* Add UDP-Lite (RFC 3828) */ | 1964 | /* Add UDP-Lite (RFC 3828) */ |
1965 | udplite4_register(); | 1965 | udplite4_register(); |
1966 | 1966 | ||
1967 | raw_init(); | ||
1968 | |||
1967 | ping_init(); | 1969 | ping_init(); |
1968 | 1970 | ||
1969 | /* | 1971 | /* |
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8ca3eb06ba04..1ebd29abe79c 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c | |||
@@ -805,7 +805,7 @@ out: | |||
805 | return copied; | 805 | return copied; |
806 | } | 806 | } |
807 | 807 | ||
808 | static int raw_init(struct sock *sk) | 808 | static int raw_sk_init(struct sock *sk) |
809 | { | 809 | { |
810 | struct raw_sock *rp = raw_sk(sk); | 810 | struct raw_sock *rp = raw_sk(sk); |
811 | 811 | ||
@@ -970,7 +970,7 @@ struct proto raw_prot = { | |||
970 | .connect = ip4_datagram_connect, | 970 | .connect = ip4_datagram_connect, |
971 | .disconnect = __udp_disconnect, | 971 | .disconnect = __udp_disconnect, |
972 | .ioctl = raw_ioctl, | 972 | .ioctl = raw_ioctl, |
973 | .init = raw_init, | 973 | .init = raw_sk_init, |
974 | .setsockopt = raw_setsockopt, | 974 | .setsockopt = raw_setsockopt, |
975 | .getsockopt = raw_getsockopt, | 975 | .getsockopt = raw_getsockopt, |
976 | .sendmsg = raw_sendmsg, | 976 | .sendmsg = raw_sendmsg, |
@@ -1133,4 +1133,28 @@ void __init raw_proc_exit(void) | |||
1133 | { | 1133 | { |
1134 | unregister_pernet_subsys(&raw_net_ops); | 1134 | unregister_pernet_subsys(&raw_net_ops); |
1135 | } | 1135 | } |
1136 | |||
1137 | static void raw_sysctl_init_net(struct net *net) | ||
1138 | { | ||
1139 | #ifdef CONFIG_NET_L3_MASTER_DEV | ||
1140 | net->ipv4.sysctl_raw_l3mdev_accept = 1; | ||
1141 | #endif | ||
1142 | } | ||
1143 | |||
1144 | static int __net_init raw_sysctl_init(struct net *net) | ||
1145 | { | ||
1146 | raw_sysctl_init_net(net); | ||
1147 | return 0; | ||
1148 | } | ||
1149 | |||
1150 | static struct pernet_operations __net_initdata raw_sysctl_ops = { | ||
1151 | .init = raw_sysctl_init, | ||
1152 | }; | ||
1153 | |||
1154 | void __init raw_init(void) | ||
1155 | { | ||
1156 | raw_sysctl_init_net(&init_net); | ||
1157 | if (register_pernet_subsys(&raw_sysctl_ops)) | ||
1158 | panic("RAW: failed to init sysctl parameters.\n"); | ||
1159 | } | ||
1136 | #endif /* CONFIG_PROC_FS */ | 1160 | #endif /* CONFIG_PROC_FS */ |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 891ed2f91467..ba0fc4b18465 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -602,6 +602,17 @@ static struct ctl_table ipv4_net_table[] = { | |||
602 | .mode = 0644, | 602 | .mode = 0644, |
603 | .proc_handler = ipv4_ping_group_range, | 603 | .proc_handler = ipv4_ping_group_range, |
604 | }, | 604 | }, |
605 | #ifdef CONFIG_NET_L3_MASTER_DEV | ||
606 | { | ||
607 | .procname = "raw_l3mdev_accept", | ||
608 | .data = &init_net.ipv4.sysctl_raw_l3mdev_accept, | ||
609 | .maxlen = sizeof(int), | ||
610 | .mode = 0644, | ||
611 | .proc_handler = proc_dointvec_minmax, | ||
612 | .extra1 = &zero, | ||
613 | .extra2 = &one, | ||
614 | }, | ||
615 | #endif | ||
605 | { | 616 | { |
606 | .procname = "tcp_ecn", | 617 | .procname = "tcp_ecn", |
607 | .data = &init_net.ipv4.sysctl_tcp_ecn, | 618 | .data = &init_net.ipv4.sysctl_tcp_ecn, |