aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLorenzo Colitti <lorenzo@google.com>2014-05-13 13:17:33 -0400
committerDavid S. Miller <davem@davemloft.net>2014-05-13 18:35:08 -0400
commite110861f86094cd78cc85593b873970092deb43a (patch)
tree535ecba8f65cefb68da0846ff54801bb32bf3c9e
parent87e067cda6df60b55cea0239c2f3cee81e9f46df (diff)
net: add a sysctl to reflect the fwmark on replies
Kernel-originated IP packets that have no user socket associated with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.) are emitted with a mark of zero. Add a sysctl to make them have the same mark as the packet they are replying to. This allows an administrator that wishes to do so to use mark-based routing, firewalling, etc. for these replies by marking the original packets inbound. Tested using user-mode linux: - ICMP/ICMPv6 echo replies and errors. - TCP RST packets (IPv4 and IPv6). Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip.h3
-rw-r--r--include/net/ipv6.h3
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/netns/ipv6.h1
-rw-r--r--net/ipv4/icmp.c11
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/sysctl_net_ipv6.c7
-rw-r--r--net/ipv6/tcp_ipv6.c1
10 files changed, 41 insertions, 3 deletions
diff --git a/include/net/ip.h b/include/net/ip.h
index 55752985c144..14c50a1650ef 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -231,6 +231,9 @@ void ipfrag_init(void);
231 231
232void ip_static_sysctl_init(void); 232void ip_static_sysctl_init(void);
233 233
234#define IP4_REPLY_MARK(net, mark) \
235 ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
236
234static inline bool ip_is_fragment(const struct iphdr *iph) 237static inline bool ip_is_fragment(const struct iphdr *iph)
235{ 238{
236 return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; 239 return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5b40ad297b8c..ba810d0546bc 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -113,6 +113,9 @@ struct frag_hdr {
113#define IP6_MF 0x0001 113#define IP6_MF 0x0001
114#define IP6_OFFSET 0xFFF8 114#define IP6_OFFSET 0xFFF8
115 115
116#define IP6_REPLY_MARK(net, mark) \
117 ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
118
116#include <net/sock.h> 119#include <net/sock.h>
117 120
118/* sysctls */ 121/* sysctls */
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b2704fd0ec80..a32fc4d705da 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -77,6 +77,8 @@ struct netns_ipv4 {
77 int sysctl_ip_no_pmtu_disc; 77 int sysctl_ip_no_pmtu_disc;
78 int sysctl_ip_fwd_use_pmtu; 78 int sysctl_ip_fwd_use_pmtu;
79 79
80 int sysctl_fwmark_reflect;
81
80 struct ping_group_range ping_group_range; 82 struct ping_group_range ping_group_range;
81 83
82 atomic_t dev_addr_genid; 84 atomic_t dev_addr_genid;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 21edaf1f7916..19d3446e59d2 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 {
30 int flowlabel_consistency; 30 int flowlabel_consistency;
31 int icmpv6_time; 31 int icmpv6_time;
32 int anycast_src_echo_reply; 32 int anycast_src_echo_reply;
33 int fwmark_reflect;
33}; 34};
34 35
35struct netns_ipv6 { 36struct netns_ipv6 {
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fe52666dc43c..79c3d947a481 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
337 struct sock *sk; 337 struct sock *sk;
338 struct inet_sock *inet; 338 struct inet_sock *inet;
339 __be32 daddr, saddr; 339 __be32 daddr, saddr;
340 u32 mark = IP4_REPLY_MARK(net, skb->mark);
340 341
341 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) 342 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb))
342 return; 343 return;
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
349 icmp_param->data.icmph.checksum = 0; 350 icmp_param->data.icmph.checksum = 0;
350 351
351 inet->tos = ip_hdr(skb)->tos; 352 inet->tos = ip_hdr(skb)->tos;
353 sk->sk_mark = mark;
352 daddr = ipc.addr = ip_hdr(skb)->saddr; 354 daddr = ipc.addr = ip_hdr(skb)->saddr;
353 saddr = fib_compute_spec_dst(skb); 355 saddr = fib_compute_spec_dst(skb);
354 ipc.opt = NULL; 356 ipc.opt = NULL;
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
364 memset(&fl4, 0, sizeof(fl4)); 366 memset(&fl4, 0, sizeof(fl4));
365 fl4.daddr = daddr; 367 fl4.daddr = daddr;
366 fl4.saddr = saddr; 368 fl4.saddr = saddr;
369 fl4.flowi4_mark = mark;
367 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); 370 fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
368 fl4.flowi4_proto = IPPROTO_ICMP; 371 fl4.flowi4_proto = IPPROTO_ICMP;
369 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 372 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
382 struct flowi4 *fl4, 385 struct flowi4 *fl4,
383 struct sk_buff *skb_in, 386 struct sk_buff *skb_in,
384 const struct iphdr *iph, 387 const struct iphdr *iph,
385 __be32 saddr, u8 tos, 388 __be32 saddr, u8 tos, u32 mark,
386 int type, int code, 389 int type, int code,
387 struct icmp_bxm *param) 390 struct icmp_bxm *param)
388{ 391{
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
394 fl4->daddr = (param->replyopts.opt.opt.srr ? 397 fl4->daddr = (param->replyopts.opt.opt.srr ?
395 param->replyopts.opt.opt.faddr : iph->saddr); 398 param->replyopts.opt.opt.faddr : iph->saddr);
396 fl4->saddr = saddr; 399 fl4->saddr = saddr;
400 fl4->flowi4_mark = mark;
397 fl4->flowi4_tos = RT_TOS(tos); 401 fl4->flowi4_tos = RT_TOS(tos);
398 fl4->flowi4_proto = IPPROTO_ICMP; 402 fl4->flowi4_proto = IPPROTO_ICMP;
399 fl4->fl4_icmp_type = type; 403 fl4->fl4_icmp_type = type;
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
491 struct flowi4 fl4; 495 struct flowi4 fl4;
492 __be32 saddr; 496 __be32 saddr;
493 u8 tos; 497 u8 tos;
498 u32 mark;
494 struct net *net; 499 struct net *net;
495 struct sock *sk; 500 struct sock *sk;
496 501
@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
592 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | 597 tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |
593 IPTOS_PREC_INTERNETCONTROL) : 598 IPTOS_PREC_INTERNETCONTROL) :
594 iph->tos; 599 iph->tos;
600 mark = IP4_REPLY_MARK(net, skb_in->mark);
595 601
596 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) 602 if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in))
597 goto out_unlock; 603 goto out_unlock;
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
608 icmp_param->skb = skb_in; 614 icmp_param->skb = skb_in;
609 icmp_param->offset = skb_network_offset(skb_in); 615 icmp_param->offset = skb_network_offset(skb_in);
610 inet_sk(sk)->tos = tos; 616 inet_sk(sk)->tos = tos;
617 sk->sk_mark = mark;
611 ipc.addr = iph->saddr; 618 ipc.addr = iph->saddr;
612 ipc.opt = &icmp_param->replyopts.opt; 619 ipc.opt = &icmp_param->replyopts.opt;
613 ipc.tx_flags = 0; 620 ipc.tx_flags = 0;
614 ipc.ttl = 0; 621 ipc.ttl = 0;
615 ipc.tos = -1; 622 ipc.tos = -1;
616 623
617 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, 624 rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
618 type, code, icmp_param); 625 type, code, icmp_param);
619 if (IS_ERR(rt)) 626 if (IS_ERR(rt))
620 goto out_unlock; 627 goto out_unlock;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6aa4380fde1a..6e231ab58d65 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
1546 daddr = replyopts.opt.opt.faddr; 1546 daddr = replyopts.opt.opt.faddr;
1547 } 1547 }
1548 1548
1549 flowi4_init_output(&fl4, arg->bound_dev_if, 0, 1549 flowi4_init_output(&fl4, arg->bound_dev_if,
1550 IP4_REPLY_MARK(net, skb->mark),
1550 RT_TOS(arg->tos), 1551 RT_TOS(arg->tos),
1551 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, 1552 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
1552 ip_reply_arg_flowi_flags(arg), 1553 ip_reply_arg_flowi_flags(arg),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5cde8f263d40..f50d51850285 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = {
838 .mode = 0644, 838 .mode = 0644,
839 .proc_handler = proc_dointvec, 839 .proc_handler = proc_dointvec,
840 }, 840 },
841 {
842 .procname = "fwmark_reflect",
843 .data = &init_net.ipv4.sysctl_fwmark_reflect,
844 .maxlen = sizeof(int),
845 .mode = 0644,
846 .proc_handler = proc_dointvec,
847 },
841 { } 848 { }
842}; 849};
843 850
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8d3952796d39..f6c84a6eb238 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
400 int len; 400 int len;
401 int hlimit; 401 int hlimit;
402 int err = 0; 402 int err = 0;
403 u32 mark = IP6_REPLY_MARK(net, skb->mark);
403 404
404 if ((u8 *)hdr < skb->head || 405 if ((u8 *)hdr < skb->head ||
405 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) 406 (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
466 fl6.daddr = hdr->saddr; 467 fl6.daddr = hdr->saddr;
467 if (saddr) 468 if (saddr)
468 fl6.saddr = *saddr; 469 fl6.saddr = *saddr;
470 fl6.flowi6_mark = mark;
469 fl6.flowi6_oif = iif; 471 fl6.flowi6_oif = iif;
470 fl6.fl6_icmp_type = type; 472 fl6.fl6_icmp_type = type;
471 fl6.fl6_icmp_code = code; 473 fl6.fl6_icmp_code = code;
@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
474 sk = icmpv6_xmit_lock(net); 476 sk = icmpv6_xmit_lock(net);
475 if (sk == NULL) 477 if (sk == NULL)
476 return; 478 return;
479 sk->sk_mark = mark;
477 np = inet6_sk(sk); 480 np = inet6_sk(sk);
478 481
479 if (!icmpv6_xrlim_allow(sk, type, &fl6)) 482 if (!icmpv6_xrlim_allow(sk, type, &fl6))
@@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
551 int err = 0; 554 int err = 0;
552 int hlimit; 555 int hlimit;
553 u8 tclass; 556 u8 tclass;
557 u32 mark = IP6_REPLY_MARK(net, skb->mark);
554 558
555 saddr = &ipv6_hdr(skb)->daddr; 559 saddr = &ipv6_hdr(skb)->daddr;
556 560
@@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
569 fl6.saddr = *saddr; 573 fl6.saddr = *saddr;
570 fl6.flowi6_oif = skb->dev->ifindex; 574 fl6.flowi6_oif = skb->dev->ifindex;
571 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; 575 fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
576 fl6.flowi6_mark = mark;
572 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 577 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
573 578
574 sk = icmpv6_xmit_lock(net); 579 sk = icmpv6_xmit_lock(net);
575 if (sk == NULL) 580 if (sk == NULL)
576 return; 581 return;
582 sk->sk_mark = mark;
577 np = inet6_sk(sk); 583 np = inet6_sk(sk);
578 584
579 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) 585 if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 7f405a168822..058f3eca2e53 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = {
38 .mode = 0644, 38 .mode = 0644,
39 .proc_handler = proc_dointvec 39 .proc_handler = proc_dointvec
40 }, 40 },
41 {
42 .procname = "fwmark_reflect",
43 .data = &init_net.ipv6.sysctl.fwmark_reflect,
44 .maxlen = sizeof(int),
45 .mode = 0644,
46 .proc_handler = proc_dointvec
47 },
41 { } 48 { }
42}; 49};
43 50
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3a267bf14f2f..c54976a44425 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
812 fl6.flowi6_oif = inet6_iif(skb); 812 fl6.flowi6_oif = inet6_iif(skb);
813 else 813 else
814 fl6.flowi6_oif = oif; 814 fl6.flowi6_oif = oif;
815 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
815 fl6.fl6_dport = t1->dest; 816 fl6.fl6_dport = t1->dest;
816 fl6.fl6_sport = t1->source; 817 fl6.fl6_sport = t1->source;
817 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); 818 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));