aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-05-13 18:35:18 -0400
committerDavid S. Miller <davem@davemloft.net>2014-05-13 18:35:18 -0400
commitb6bd26c4de0141d0736a51487e4ca37390fcae03 (patch)
treeb8040290a249b8242767c1bb7f433deb40b4e6e0
parent87e067cda6df60b55cea0239c2f3cee81e9f46df (diff)
parent84f39b08d7868ce10eeaf640627cb89777f0ae93 (diff)
Merge branch 'inet_fwmark_reflect'
Lorenzo Colitti says: ==================== Make mark-based routing work better with multiple separate networks. Mark-based routing (ip rule fwmark 17 lookup 100) combined with either iptables marking (iptables -j MARK --set-mark 17) or application-based marking (the SO_MARK setsockopt) are a good way to deal with connecting simultaneously to multiple networks. Each network can be given a routing table, and ip rules can be configured to make different fwmarks select different networks. Applications can select networks them by setting appropriate socket marks, and iptables rules can be used to handle non-aware applications, enforce policy, etc. This patch series improves functionality when mark-based routing is used in this way. Current behaviour has the following limitations: 1. Kernel-originated replies that are not associated with a socket always use a mark of zero. This means that, for example, when the kernel sends a ping reply or a TCP reset, it does not send it on the network from which it received the original packet. 2. Path MTU discovery, which is triggered by incoming packets, does not always work correctly, because the routing lookups it uses to clone routes do not take the fwmark into account and thus can happen in the wrong routing table. 3. Application-based marking works well for outbound connections, but does not work well for incoming connections. Marking a listening socket causes that socket to only accept connections from a given network, and sockets that are returned by accept() are not marked (and are thus not routed correctly). sysctl. This causes route lookups for kernel-generated replies and PMTUD to use the fwmark of the packet that caused them. which causes TCP sockets returned by accept() to be marked with the same mark that sent the intial SYN packet. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_sock.h10
-rw-r--r--include/net/ip.h3
-rw-r--r--include/net/ipv6.h3
-rw-r--r--include/net/netns/ipv4.h3
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--net/ipv4/icmp.c11
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c14
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/syncookies.c4
-rw-r--r--net/ipv6/sysctl_net_ipv6.c7
-rw-r--r--net/ipv6/tcp_ipv6.c2
18 files changed, 79 insertions, 9 deletions
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 1833c3f389ee..b1edf17bec01 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -90,6 +90,7 @@ struct inet_request_sock {
90 kmemcheck_bitfield_end(flags); 90 kmemcheck_bitfield_end(flags);
91 struct ip_options_rcu *opt; 91 struct ip_options_rcu *opt;
92 struct sk_buff *pktopts; 92 struct sk_buff *pktopts;
93 u32 ir_mark;
93}; 94};
94 95
95static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) 96static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
@@ -97,6 +98,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
97 return (struct inet_request_sock *)sk; 98 return (struct inet_request_sock *)sk;
98} 99}
99 100
101static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb)
102{
103 if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) {
104 return skb->mark;
105 } else {
106 return sk->sk_mark;
107 }
108}
109
100struct inet_cork { 110struct inet_cork {
101 unsigned int flags; 111 unsigned int flags;
102 __be32 addr; 112 __be32 addr;
diff --git a/include/net/ip.h b/include/net/ip.h
index 55752985c144..14c50a1650ef 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -231,6 +231,9 @@ void ipfrag_init(void);
231 231
232void ip_static_sysctl_init(void); 232void ip_static_sysctl_init(void);
233 233
234#define IP4_REPLY_MARK(net, mark) \
235 ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
236
234static inline bool ip_is_fragment(const struct iphdr *iph) 237static inline bool ip_is_fragment(const struct iphdr *iph)
235{ 238{
236 return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; 239 return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5b40ad297b8c..ba810d0546bc 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -113,6 +113,9 @@ struct frag_hdr {
113#define IP6_MF 0x0001 113#define IP6_MF 0x0001
114#define IP6_OFFSET 0xFFF8 114#define IP6_OFFSET 0xFFF8
115 115
116#define IP6_REPLY_MARK(net, mark) \
117 ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
118
116#include <net/sock.h> 119#include <net/sock.h>
117 120
118/* sysctls */ 121/* sysctls */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b2704fd0ec80..2f0cfad66666 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -77,6 +77,9 @@ struct netns_ipv4 {
77 int sysctl_ip_no_pmtu_disc; 77 int sysctl_ip_no_pmtu_disc;
78 int sysctl_ip_fwd_use_pmtu; 78 int sysctl_ip_fwd_use_pmtu;
79 79
80 int sysctl_fwmark_reflect;
81 int sysctl_tcp_fwmark_accept;
82
80 struct ping_group_range ping_group_range; 83 struct ping_group_range ping_group_range;
81 84
82 atomic_t dev_addr_genid; 85 atomic_t dev_addr_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 21edaf1f7916..19d3446e59d2 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
30 int flowlabel_consistency; 30 int flowlabel_consistency;
31 int icmpv6_time; 31 int icmpv6_time;
32 int anycast_src_echo_reply; 32 int anycast_src_echo_reply;
33 int fwmark_reflect;
33}; 34};
34 35
35struct netns_ipv6 { 36struct netns_ipv6 {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fe52666dc43c..79c3d947a481 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
337 struct sock *sk; 337 struct sock *sk;
338 struct inet_sock *inet; 338 struct inet_sock *inet;
339 __be32 daddr, saddr; 339 __be32 daddr, saddr;
340 u32 mark = IP4_REPLY_MARK(net, skb->mark);
340 341
341 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) 342 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
342 return; 343 return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
349 icmp_param->data.icmph.checksum = 0; 350 icmp_param->data.icmph.checksum = 0;
350 351
351 inet->tos = ip_hdr(skb)->tos; 352 inet->tos = ip_hdr(skb)->tos;
353 sk->sk_mark = mark;
352 daddr = ipc.addr = ip_hdr(skb)->saddr; 354 daddr = ipc.addr = ip_hdr(skb)->saddr;
353 saddr = fib_compute_spec_dst(skb); 355 saddr = fib_compute_spec_dst(skb);
354 ipc.opt = NULL; 356 ipc.opt = NULL;
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
364 memset(&fl4, 0, sizeof(fl4)); 366 memset(&fl4, 0, sizeof(fl4));
365 fl4.daddr = daddr; 367 fl4.daddr = daddr;
366 fl4.saddr = saddr; 368 fl4.saddr = saddr;
369 fl4.flowi4_mark = mark;
367 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 370 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
368 fl4.flowi4_proto = IPPROTO_ICMP; 371 fl4.flowi4_proto = IPPROTO_ICMP;
369 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 372 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
382 struct flowi4 *fl4, 385 struct flowi4 *fl4,
383 struct sk_buff *skb_in, 386 struct sk_buff *skb_in,
384 const struct iphdr *iph, 387 const struct iphdr *iph,
385 __be32 saddr, u8 tos, 388 __be32 saddr, u8 tos, u32 mark,
386 int type, int code, 389 int type, int code,
387 struct icmp_bxm *param) 390 struct icmp_bxm *param)
388{ 391{
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
394 fl4->daddr = (param->replyopts.opt.opt.srr ? 397 fl4->daddr = (param->replyopts.opt.opt.srr ?
395 param->replyopts.opt.opt.faddr : iph->saddr); 398 param->replyopts.opt.opt.faddr : iph->saddr);
396 fl4->saddr = saddr; 399 fl4->saddr = saddr;
400 fl4->flowi4_mark = mark;
397 fl4->flowi4_tos = RT_TOS(tos); 401 fl4->flowi4_tos = RT_TOS(tos);
398 fl4->flowi4_proto = IPPROTO_ICMP; 402 fl4->flowi4_proto = IPPROTO_ICMP;
399 fl4->fl4_icmp_type = type; 403 fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
491 struct flowi4 fl4; 495 struct flowi4 fl4;
492 __be32 saddr; 496 __be32 saddr;
493 u8 tos; 497 u8 tos;
498 u32 mark;
494 struct net *net; 499 struct net *net;
495 struct sock *sk; 500 struct sock *sk;
496 501
@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
592 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | 597 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
593 IPTOS_PREC_INTERNETCONTROL) : 598 IPTOS_PREC_INTERNETCONTROL) :
594 iph->tos; 599 iph->tos;
600 mark = IP4_REPLY_MARK(net, skb_in->mark);
595 601
596 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) 602 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
597 goto out_unlock; 603 goto out_unlock;
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
608 icmp_param->skb = skb_in; 614 icmp_param->skb = skb_in;
609 icmp_param->offset = skb_network_offset(skb_in); 615 icmp_param->offset = skb_network_offset(skb_in);
610 inet_sk(sk)->tos = tos; 616 inet_sk(sk)->tos = tos;
617 sk->sk_mark = mark;
611 ipc.addr = iph->saddr; 618 ipc.addr = iph->saddr;
612 ipc.opt = &icmp_param->replyopts.opt; 619 ipc.opt = &icmp_param->replyopts.opt;
613 ipc.tx_flags = 0; 620 ipc.tx_flags = 0;
614 ipc.ttl = 0; 621 ipc.ttl = 0;
615 ipc.tos = -1; 622 ipc.tos = -1;
616 623
617 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, 624 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
618 type, code, icmp_param); 625 type, code, icmp_param);
619 if (IS_ERR(rt)) 626 if (IS_ERR(rt))
620 goto out_unlock; 627 goto out_unlock;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a56b8e6e866a..12e502cbfdc7 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -408,7 +408,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
408 struct net *net = sock_net(sk); 408 struct net *net = sock_net(sk);
409 int flags = inet_sk_flowi_flags(sk); 409 int flags = inet_sk_flowi_flags(sk);
410 410
411 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 411 flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
412 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 412 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
413 sk->sk_protocol, 413 sk->sk_protocol,
414 flags, 414 flags,
@@ -445,7 +445,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
445 445
446 rcu_read_lock(); 446 rcu_read_lock();
447 opt = rcu_dereference(newinet->inet_opt); 447 opt = rcu_dereference(newinet->inet_opt);
448 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, 448 flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
449 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, 449 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
450 sk->sk_protocol, inet_sk_flowi_flags(sk), 450 sk->sk_protocol, inet_sk_flowi_flags(sk),
451 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr, 451 (opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
@@ -680,6 +680,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
680 inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num); 680 inet_sk(newsk)->inet_sport = htons(inet_rsk(req)->ir_num);
681 newsk->sk_write_space = sk_stream_write_space; 681 newsk->sk_write_space = sk_stream_write_space;
682 682
683 newsk->sk_mark = inet_rsk(req)->ir_mark;
684
683 newicsk->icsk_retransmits = 0; 685 newicsk->icsk_retransmits = 0;
684 newicsk->icsk_backoff = 0; 686 newicsk->icsk_backoff = 0;
685 newicsk->icsk_probes_out = 0; 687 newicsk->icsk_probes_out = 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6aa4380fde1a..6e231ab58d65 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
1546 daddr = replyopts.opt.opt.faddr; 1546 daddr = replyopts.opt.opt.faddr;
1547 } 1547 }
1548 1548
1549 flowi4_init_output(&fl4, arg->bound_dev_if, 0, 1549 flowi4_init_output(&fl4, arg->bound_dev_if,
1550 IP4_REPLY_MARK(net, skb->mark),
1550 RT_TOS(arg->tos), 1551 RT_TOS(arg->tos),
1551 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, 1552 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
1552 ip_reply_arg_flowi_flags(arg), 1553 ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index db1e0da871f4..50e1e0feddfc 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -993,6 +993,9 @@ void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
993 struct flowi4 fl4; 993 struct flowi4 fl4;
994 struct rtable *rt; 994 struct rtable *rt;
995 995
996 if (!mark)
997 mark = IP4_REPLY_MARK(net, skb->mark);
998
996 __build_flow_key(&fl4, NULL, iph, oif, 999 __build_flow_key(&fl4, NULL, iph, oif,
997 RT_TOS(iph->tos), protocol, mark, flow_flags); 1000 RT_TOS(iph->tos), protocol, mark, flow_flags);
998 rt = __ip_route_output_key(net, &fl4); 1001 rt = __ip_route_output_key(net, &fl4);
@@ -1010,6 +1013,10 @@ static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1010 struct rtable *rt; 1013 struct rtable *rt;
1011 1014
1012 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0); 1015 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1016
1017 if (!fl4.flowi4_mark)
1018 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1019
1013 rt = __ip_route_output_key(sock_net(sk), &fl4); 1020 rt = __ip_route_output_key(sock_net(sk), &fl4);
1014 if (!IS_ERR(rt)) { 1021 if (!IS_ERR(rt)) {
1015 __ip_rt_update_pmtu(rt, &fl4, mtu); 1022 __ip_rt_update_pmtu(rt, &fl4, mtu);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f2ed13c2125f..c86624b36a62 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -303,6 +303,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
303 ireq->ir_rmt_port = th->source; 303 ireq->ir_rmt_port = th->source;
304 ireq->ir_loc_addr = ip_hdr(skb)->daddr; 304 ireq->ir_loc_addr = ip_hdr(skb)->daddr;
305 ireq->ir_rmt_addr = ip_hdr(skb)->saddr; 305 ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
306 ireq->ir_mark = inet_request_mark(sk, skb);
306 ireq->ecn_ok = ecn_ok; 307 ireq->ecn_ok = ecn_ok;
307 ireq->snd_wscale = tcp_opt.snd_wscale; 308 ireq->snd_wscale = tcp_opt.snd_wscale;
308 ireq->sack_ok = tcp_opt.sack_ok; 309 ireq->sack_ok = tcp_opt.sack_ok;
@@ -339,7 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
339 * hasn't changed since we received the original syn, but I see 340 * hasn't changed since we received the original syn, but I see
340 * no easy way to do this. 341 * no easy way to do this.
341 */ 342 */
342 flowi4_init_output(&fl4, sk->sk_bound_dev_if, sk->sk_mark, 343 flowi4_init_output(&fl4, sk->sk_bound_dev_if, ireq->ir_mark,
343 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, 344 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
344 inet_sk_flowi_flags(sk), 345 inet_sk_flowi_flags(sk),
345 (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr, 346 (opt && opt->srr) ? opt->faddr : ireq->ir_rmt_addr,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5cde8f263d40..a33b9fbc1d80 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -838,6 +838,20 @@ static struct ctl_table ipv4_net_table[] = {
838 .mode = 0644, 838 .mode = 0644,
839 .proc_handler = proc_dointvec, 839 .proc_handler = proc_dointvec,
840 }, 840 },
841 {
842 .procname = "fwmark_reflect",
843 .data = &init_net.ipv4.sysctl_fwmark_reflect,
844 .maxlen = sizeof(int),
845 .mode = 0644,
846 .proc_handler = proc_dointvec,
847 },
848 {
849 .procname = "tcp_fwmark_accept",
850 .data = &init_net.ipv4.sysctl_tcp_fwmark_accept,
851 .maxlen = sizeof(int),
852 .mode = 0644,
853 .proc_handler = proc_dointvec,
854 },
841 { } 855 { }
842}; 856};
843 857
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a2780e5334c9..77cccda1ad0c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1318,6 +1318,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1318 ireq->ir_rmt_addr = saddr; 1318 ireq->ir_rmt_addr = saddr;
1319 ireq->no_srccheck = inet_sk(sk)->transparent; 1319 ireq->no_srccheck = inet_sk(sk)->transparent;
1320 ireq->opt = tcp_v4_save_options(skb); 1320 ireq->opt = tcp_v4_save_options(skb);
1321 ireq->ir_mark = inet_request_mark(sk, skb);
1321 1322
1322 if (security_inet_conn_request(sk, skb, req)) 1323 if (security_inet_conn_request(sk, skb, req))
1323 goto drop_and_free; 1324 goto drop_and_free;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d3952796d39..f6c84a6eb238 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
400 int len; 400 int len;
401 int hlimit; 401 int hlimit;
402 int err = 0; 402 int err = 0;
403 u32 mark = IP6_REPLY_MARK(net, skb->mark);
403 404
404 if ((u8 *)hdr < skb->head || 405 if ((u8 *)hdr < skb->head ||
405 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) 406 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
466 fl6.daddr = hdr->saddr; 467 fl6.daddr = hdr->saddr;
467 if (saddr) 468 if (saddr)
468 fl6.saddr = *saddr; 469 fl6.saddr = *saddr;
470 fl6.flowi6_mark = mark;
469 fl6.flowi6_oif = iif; 471 fl6.flowi6_oif = iif;
470 fl6.fl6_icmp_type = type; 472 fl6.fl6_icmp_type = type;
471 fl6.fl6_icmp_code = code; 473 fl6.fl6_icmp_code = code;
@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
474 sk = icmpv6_xmit_lock(net); 476 sk = icmpv6_xmit_lock(net);
475 if (sk == NULL) 477 if (sk == NULL)
476 return; 478 return;
479 sk->sk_mark = mark;
477 np = inet6_sk(sk); 480 np = inet6_sk(sk);
478 481
479 if (!icmpv6_xrlim_allow(sk, type, &fl6)) 482 if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
551 int err = 0; 554 int err = 0;
552 int hlimit; 555 int hlimit;
553 u8 tclass; 556 u8 tclass;
557 u32 mark = IP6_REPLY_MARK(net, skb->mark);
554 558
555 saddr = &ipv6_hdr(skb)->daddr; 559 saddr = &ipv6_hdr(skb)->daddr;
556 560
@@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
569 fl6.saddr = *saddr; 573 fl6.saddr = *saddr;
570 fl6.flowi6_oif = skb->dev->ifindex; 574 fl6.flowi6_oif = skb->dev->ifindex;
571 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; 575 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
576 fl6.flowi6_mark = mark;
572 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 577 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
573 578
574 sk = icmpv6_xmit_lock(net); 579 sk = icmpv6_xmit_lock(net);
575 if (sk == NULL) 580 if (sk == NULL)
576 return; 581 return;
582 sk->sk_mark = mark;
577 np = inet6_sk(sk); 583 np = inet6_sk(sk);
578 584
579 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 585 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index d4ade34ab375..a245e5ddffbd 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -81,7 +81,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
81 final_p = fl6_update_dst(fl6, np->opt, &final); 81 final_p = fl6_update_dst(fl6, np->opt, &final);
82 fl6->saddr = ireq->ir_v6_loc_addr; 82 fl6->saddr = ireq->ir_v6_loc_addr;
83 fl6->flowi6_oif = ireq->ir_iif; 83 fl6->flowi6_oif = ireq->ir_iif;
84 fl6->flowi6_mark = sk->sk_mark; 84 fl6->flowi6_mark = ireq->ir_mark;
85 fl6->fl6_dport = ireq->ir_rmt_port; 85 fl6->fl6_dport = ireq->ir_rmt_port;
86 fl6->fl6_sport = htons(ireq->ir_num); 86 fl6->fl6_sport = htons(ireq->ir_num);
87 security_req_classify_flow(req, flowi6_to_flowi(fl6)); 87 security_req_classify_flow(req, flowi6_to_flowi(fl6));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 004fffb6c221..f0a8ff9ed891 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1176,7 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1176 1176
1177 memset(&fl6, 0, sizeof(fl6)); 1177 memset(&fl6, 0, sizeof(fl6));
1178 fl6.flowi6_oif = oif; 1178 fl6.flowi6_oif = oif;
1179 fl6.flowi6_mark = mark; 1179 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1180 fl6.daddr = iph->daddr; 1180 fl6.daddr = iph->daddr;
1181 fl6.saddr = iph->saddr; 1181 fl6.saddr = iph->saddr;
1182 fl6.flowlabel = ip6_flowinfo(iph); 1182 fl6.flowlabel = ip6_flowinfo(iph);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index bb53a5e73c1a..a822b880689b 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
216 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 216 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
217 ireq->ir_iif = inet6_iif(skb); 217 ireq->ir_iif = inet6_iif(skb);
218 218
219 ireq->ir_mark = inet_request_mark(sk, skb);
220
219 req->expires = 0UL; 221 req->expires = 0UL;
220 req->num_retrans = 0; 222 req->num_retrans = 0;
221 ireq->ecn_ok = ecn_ok; 223 ireq->ecn_ok = ecn_ok;
@@ -242,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
242 final_p = fl6_update_dst(&fl6, np->opt, &final); 244 final_p = fl6_update_dst(&fl6, np->opt, &final);
243 fl6.saddr = ireq->ir_v6_loc_addr; 245 fl6.saddr = ireq->ir_v6_loc_addr;
244 fl6.flowi6_oif = sk->sk_bound_dev_if; 246 fl6.flowi6_oif = sk->sk_bound_dev_if;
245 fl6.flowi6_mark = sk->sk_mark; 247 fl6.flowi6_mark = ireq->ir_mark;
246 fl6.fl6_dport = ireq->ir_rmt_port; 248 fl6.fl6_dport = ireq->ir_rmt_port;
247 fl6.fl6_sport = inet_sk(sk)->inet_sport; 249 fl6.fl6_sport = inet_sk(sk)->inet_sport;
248 security_req_classify_flow(req, flowi6_to_flowi(&fl6)); 250 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7f405a168822..058f3eca2e53 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
38 .mode = 0644, 38 .mode = 0644,
39 .proc_handler = proc_dointvec 39 .proc_handler = proc_dointvec
40 }, 40 },
41 {
42 .procname = "fwmark_reflect",
43 .data = &init_net.ipv6.sysctl.fwmark_reflect,
44 .maxlen = sizeof(int),
45 .mode = 0644,
46 .proc_handler = proc_dointvec
47 },
41 { } 48 { }
42}; 49};
43 50
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3a267bf14f2f..f07b2abba359 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
812 fl6.flowi6_oif = inet6_iif(skb); 812 fl6.flowi6_oif = inet6_iif(skb);
813 else 813 else
814 fl6.flowi6_oif = oif; 814 fl6.flowi6_oif = oif;
815 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
815 fl6.fl6_dport = t1->dest; 816 fl6.fl6_dport = t1->dest;
816 fl6.fl6_sport = t1->source; 817 fl6.fl6_sport = t1->source;
817 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 818 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -1033,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1033 TCP_ECN_create_request(req, skb, sock_net(sk)); 1034 TCP_ECN_create_request(req, skb, sock_net(sk));
1034 1035
1035 ireq->ir_iif = sk->sk_bound_dev_if; 1036 ireq->ir_iif = sk->sk_bound_dev_if;
1037 ireq->ir_mark = inet_request_mark(sk, skb);
1036 1038
1037 /* So that link locals have meaning */ 1039 /* So that link locals have meaning */
1038 if (!sk->sk_bound_dev_if && 1040 if (!sk->sk_bound_dev_if &&