aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLorenzo Colitti <lorenzo@google.com>2014-05-13 13:17:35 -0400
committerDavid S. Miller <davem@davemloft.net>2014-05-13 18:35:09 -0400
commit84f39b08d7868ce10eeaf640627cb89777f0ae93 (patch)
treeb8040290a249b8242767c1bb7f433deb40b4e6e0
parent1b3c61dc1aebf5d3d6c3981ba3eedc1e66f3ecda (diff)
net: support marking accepting TCP sockets
When using mark-based routing, sockets returned from accept() may need to be marked differently depending on the incoming connection request. This is the case, for example, if different socket marks identify different networks: a listening socket may want to accept connections from all networks, but each connection should be marked with the network that the request came in on, so that subsequent packets are sent on the correct network. This patch adds a sysctl to mark TCP sockets based on the fwmark of the incoming SYN packet. If enabled, and an unmarked socket receives a SYN, then the SYN packet's fwmark is written to the connection's inet_request_sock, and later written back to the accepted socket when the connection is established. If the socket already has a nonzero mark, then the behaviour is the same as it is today, i.e., the listening socket's fwmark is used. Black-box tested using user-mode linux: - IPv4/IPv6 SYN+ACK, FIN, etc. packets are routed based on the mark of the incoming SYN packet. - The socket returned by accept() is marked with the mark of the incoming SYN packet. - Tested with syncookies=1 and syncookies=2. Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_sock.h10
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/tcp_ipv6.c1
9 files changed, 30 insertions, 5 deletions
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1833c3f389ee..b1edf17bec01 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -90,6 +90,7 @@ struct inet_request_sock {
90 kmemcheck_bitfield_end(flags); 90 kmemcheck_bitfield_end(flags);
91 struct ip_options_rcu *opt; 91 struct ip_options_rcu *opt;
92 struct sk_buff *pktopts; 92 struct sk_buff *pktopts;
93 u32 ir_mark;
93}; 94};
94 95
95static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) 96static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -97,6 +98,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
97 return (struct inet_request_sock *)sk; 98 return (struct inet_request_sock *)sk;
98} 99}
99 100
101static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
102{
103 if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
104 return skb->mark;
105 } else {
106 return sk->sk_mark;
107 }
108}
109
100struct inet_cork { 110struct inet_cork {
101 unsigned int flags; 111 unsigned int flags;
102 __be32 addr; 112 __be32 addr;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index a32fc4d705da..2f0cfad66666 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -78,6 +78,7 @@ struct netns_ipv4 {
78 int sysctl_ip_fwd_use_pmtu; 78 int sysctl_ip_fwd_use_pmtu;
79 79
80 int sysctl_fwmark_reflect; 80 int sysctl_fwmark_reflect;
81 int sysctl_tcp_fwmark_accept;
81 82
82 struct ping_group_range ping_group_range; 83 struct ping_group_range ping_group_range;
83 84
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a56b8e6e866a..12e502cbfdc7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -408,7 +408,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
408 struct net *net = sock_net(sk); 408 struct net *net = sock_net(sk);
409 int flags = inet_sk_flowi_flags(sk); 409 int flags = inet_sk_flowi_flags(sk);
410 410
411 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 411 flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
412 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 412 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
413 sk->sk_protocol, 413 sk->sk_protocol,
414 flags, 414 flags,
@@ -445,7 +445,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
445 445
446 rcu_read_lock(); 446 rcu_read_lock();
447 opt = rcu_dereference(newinet->inet_opt); 447 opt = rcu_dereference(newinet->inet_opt);
448 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 448 flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
449 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 449 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
450 sk->sk_protocol, inet_sk_flowi_flags(sk), 450 sk->sk_protocol, inet_sk_flowi_flags(sk),
451 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 451 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +680,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
680 inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 680 inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
681 newsk->sk_write_space = sk_stream_write_space; 681 newsk->sk_write_space = sk_stream_write_space;
682 682
683 newsk->sk_mark = inet_rsk(req)->ir_mark;
684
683 newicsk->icsk_retransmits = 0; 685 newicsk->icsk_retransmits = 0;
684 newicsk->icsk_backoff = 0; 686 newicsk->icsk_backoff = 0;
685 newicsk->icsk_probes_out = 0; 687 newicsk->icsk_probes_out = 0;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f2ed13c2125f..c86624b36a62 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
303 ireq->ir_rmt_port = th->source; 303 ireq->ir_rmt_port = th->source;
304 ireq->ir_loc_addr = ip_hdr(skb)->daddr; 304 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
305 ireq->ir_rmt_addr = ip_hdr(skb)->saddr; 305 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
306 ireq->ir_mark = inet_request_mark(sk, skb);
306 ireq->ecn_ok = ecn_ok; 307 ireq->ecn_ok = ecn_ok;
307 ireq->snd_wscale = tcp_opt.snd_wscale; 308 ireq->snd_wscale = tcp_opt.snd_wscale;
308 ireq->sack_ok = tcp_opt.sack_ok; 309 ireq->sack_ok = tcp_opt.sack_ok;
@@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
339 * hasn't changed since we received the original syn, but I see 340 * hasn't changed since we received the original syn, but I see
340 * no easy way to do this. 341 * no easy way to do this.
341 */ 342 */
342 flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, 343 flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
343 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, 344 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
344 inet_sk_flowi_flags(sk), 345 inet_sk_flowi_flags(sk),
345 (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, 346 (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f50d51850285..a33b9fbc1d80 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -845,6 +845,13 @@ static struct ctl_table ipv4_net_table[] = {
845 .mode = 0644, 845 .mode = 0644,
846 .proc_handler = proc_dointvec, 846 .proc_handler = proc_dointvec,
847 }, 847 },
848 {
849 .procname = "tcp_fwmark_accept",
850 .data = &init_net.ipv4.sysctl_tcp_fwmark_accept,
851 .maxlen = sizeof(int),
852 .mode = 0644,
853 .proc_handler = proc_dointvec,
854 },
848 { } 855 { }
849}; 856};
850 857
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a2780e5334c9..77cccda1ad0c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1318,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1318 ireq->ir_rmt_addr = saddr; 1318 ireq->ir_rmt_addr = saddr;
1319 ireq->no_srccheck = inet_sk(sk)->transparent; 1319 ireq->no_srccheck = inet_sk(sk)->transparent;
1320 ireq->opt = tcp_v4_save_options(skb); 1320 ireq->opt = tcp_v4_save_options(skb);
1321 ireq->ir_mark = inet_request_mark(sk, skb);
1321 1322
1322 if (security_inet_conn_request(sk, skb, req)) 1323 if (security_inet_conn_request(sk, skb, req))
1323 goto drop_and_free; 1324 goto drop_and_free;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d4ade34ab375..a245e5ddffbd 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
81 final_p = fl6_update_dst(fl6, np->opt, &final); 81 final_p = fl6_update_dst(fl6, np->opt, &final);
82 fl6->saddr = ireq->ir_v6_loc_addr; 82 fl6->saddr = ireq->ir_v6_loc_addr;
83 fl6->flowi6_oif = ireq->ir_iif; 83 fl6->flowi6_oif = ireq->ir_iif;
84 fl6->flowi6_mark = sk->sk_mark; 84 fl6->flowi6_mark = ireq->ir_mark;
85 fl6->fl6_dport = ireq->ir_rmt_port; 85 fl6->fl6_dport = ireq->ir_rmt_port;
86 fl6->fl6_sport = htons(ireq->ir_num); 86 fl6->fl6_sport = htons(ireq->ir_num);
87 security_req_classify_flow(req, flowi6_to_flowi(fl6)); 87 security_req_classify_flow(req, flowi6_to_flowi(fl6));
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb53a5e73c1a..a822b880689b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
216 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 216 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
217 ireq->ir_iif = inet6_iif(skb); 217 ireq->ir_iif = inet6_iif(skb);
218 218
219 ireq->ir_mark = inet_request_mark(sk, skb);
220
219 req->expires = 0UL; 221 req->expires = 0UL;
220 req->num_retrans = 0; 222 req->num_retrans = 0;
221 ireq->ecn_ok = ecn_ok; 223 ireq->ecn_ok = ecn_ok;
@@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
242 final_p = fl6_update_dst(&fl6, np->opt, &final); 244 final_p = fl6_update_dst(&fl6, np->opt, &final);
243 fl6.saddr = ireq->ir_v6_loc_addr; 245 fl6.saddr = ireq->ir_v6_loc_addr;
244 fl6.flowi6_oif = sk->sk_bound_dev_if; 246 fl6.flowi6_oif = sk->sk_bound_dev_if;
245 fl6.flowi6_mark = sk->sk_mark; 247 fl6.flowi6_mark = ireq->ir_mark;
246 fl6.fl6_dport = ireq->ir_rmt_port; 248 fl6.fl6_dport = ireq->ir_rmt_port;
247 fl6.fl6_sport = inet_sk(sk)->inet_sport; 249 fl6.fl6_sport = inet_sk(sk)->inet_sport;
248 security_req_classify_flow(req, flowi6_to_flowi(&fl6)); 250 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c54976a44425..f07b2abba359 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1034,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1034 TCP_ECN_create_request(req, skb, sock_net(sk)); 1034 TCP_ECN_create_request(req, skb, sock_net(sk));
1035 1035
1036 ireq->ir_iif = sk->sk_bound_dev_if; 1036 ireq->ir_iif = sk->sk_bound_dev_if;
1037 ireq->ir_mark = inet_request_mark(sk, skb);
1037 1038
1038 /* So that link locals have meaning */ 1039 /* So that link locals have meaning */
1039 if (!sk->sk_bound_dev_if && 1040 if (!sk->sk_bound_dev_if &&