diff options
author | Lorenzo Colitti <lorenzo@google.com> | 2014-05-13 13:17:33 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-05-13 18:35:08 -0400 |
commit | e110861f86094cd78cc85593b873970092deb43a (patch) | |
tree | 535ecba8f65cefb68da0846ff54801bb32bf3c9e | |
parent | 87e067cda6df60b55cea0239c2f3cee81e9f46df (diff) |
net: add a sysctl to reflect the fwmark on replies
Kernel-originated IP packets that have no user socket associated
with them (e.g., ICMP errors and echo replies, TCP RSTs, etc.)
are emitted with a mark of zero. Add a sysctl to make them have
the same mark as the packet they are replying to.
This allows an administrator that wishes to do so to use
mark-based routing, firewalling, etc. for these replies by
marking the original packets inbound.
Tested using user-mode linux:
- ICMP/ICMPv6 echo replies and errors.
- TCP RST packets (IPv4 and IPv6).
Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip.h | 3 | ||||
-rw-r--r-- | include/net/ipv6.h | 3 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 2 | ||||
-rw-r--r-- | include/net/netns/ipv6.h | 1 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 11 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 3 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 6 | ||||
-rw-r--r-- | net/ipv6/sysctl_net_ipv6.c | 7 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 1 |
10 files changed, 41 insertions, 3 deletions
diff --git a/include/net/ip.h b/include/net/ip.h index 55752985c144..14c50a1650ef 100644 --- a/include/net/ip.h +++ b/include/net/ip.h | |||
@@ -231,6 +231,9 @@ void ipfrag_init(void); | |||
231 | 231 | ||
232 | void ip_static_sysctl_init(void); | 232 | void ip_static_sysctl_init(void); |
233 | 233 | ||
234 | #define IP4_REPLY_MARK(net, mark) \ | ||
235 | ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) | ||
236 | |||
234 | static inline bool ip_is_fragment(const struct iphdr *iph) | 237 | static inline bool ip_is_fragment(const struct iphdr *iph) |
235 | { | 238 | { |
236 | return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; | 239 | return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; |
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5b40ad297b8c..ba810d0546bc 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h | |||
@@ -113,6 +113,9 @@ struct frag_hdr { | |||
113 | #define IP6_MF 0x0001 | 113 | #define IP6_MF 0x0001 |
114 | #define IP6_OFFSET 0xFFF8 | 114 | #define IP6_OFFSET 0xFFF8 |
115 | 115 | ||
116 | #define IP6_REPLY_MARK(net, mark) \ | ||
117 | ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) | ||
118 | |||
116 | #include <net/sock.h> | 119 | #include <net/sock.h> |
117 | 120 | ||
118 | /* sysctls */ | 121 | /* sysctls */ |
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index b2704fd0ec80..a32fc4d705da 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h | |||
@@ -77,6 +77,8 @@ struct netns_ipv4 { | |||
77 | int sysctl_ip_no_pmtu_disc; | 77 | int sysctl_ip_no_pmtu_disc; |
78 | int sysctl_ip_fwd_use_pmtu; | 78 | int sysctl_ip_fwd_use_pmtu; |
79 | 79 | ||
80 | int sysctl_fwmark_reflect; | ||
81 | |||
80 | struct ping_group_range ping_group_range; | 82 | struct ping_group_range ping_group_range; |
81 | 83 | ||
82 | atomic_t dev_addr_genid; | 84 | atomic_t dev_addr_genid; |
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 21edaf1f7916..19d3446e59d2 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h | |||
@@ -30,6 +30,7 @@ struct netns_sysctl_ipv6 { | |||
30 | int flowlabel_consistency; | 30 | int flowlabel_consistency; |
31 | int icmpv6_time; | 31 | int icmpv6_time; |
32 | int anycast_src_echo_reply; | 32 | int anycast_src_echo_reply; |
33 | int fwmark_reflect; | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | struct netns_ipv6 { | 36 | struct netns_ipv6 { |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index fe52666dc43c..79c3d947a481 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -337,6 +337,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
337 | struct sock *sk; | 337 | struct sock *sk; |
338 | struct inet_sock *inet; | 338 | struct inet_sock *inet; |
339 | __be32 daddr, saddr; | 339 | __be32 daddr, saddr; |
340 | u32 mark = IP4_REPLY_MARK(net, skb->mark); | ||
340 | 341 | ||
341 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) | 342 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) |
342 | return; | 343 | return; |
@@ -349,6 +350,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
349 | icmp_param->data.icmph.checksum = 0; | 350 | icmp_param->data.icmph.checksum = 0; |
350 | 351 | ||
351 | inet->tos = ip_hdr(skb)->tos; | 352 | inet->tos = ip_hdr(skb)->tos; |
353 | sk->sk_mark = mark; | ||
352 | daddr = ipc.addr = ip_hdr(skb)->saddr; | 354 | daddr = ipc.addr = ip_hdr(skb)->saddr; |
353 | saddr = fib_compute_spec_dst(skb); | 355 | saddr = fib_compute_spec_dst(skb); |
354 | ipc.opt = NULL; | 356 | ipc.opt = NULL; |
@@ -364,6 +366,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) | |||
364 | memset(&fl4, 0, sizeof(fl4)); | 366 | memset(&fl4, 0, sizeof(fl4)); |
365 | fl4.daddr = daddr; | 367 | fl4.daddr = daddr; |
366 | fl4.saddr = saddr; | 368 | fl4.saddr = saddr; |
369 | fl4.flowi4_mark = mark; | ||
367 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); | 370 | fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); |
368 | fl4.flowi4_proto = IPPROTO_ICMP; | 371 | fl4.flowi4_proto = IPPROTO_ICMP; |
369 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); | 372 | security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); |
@@ -382,7 +385,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
382 | struct flowi4 *fl4, | 385 | struct flowi4 *fl4, |
383 | struct sk_buff *skb_in, | 386 | struct sk_buff *skb_in, |
384 | const struct iphdr *iph, | 387 | const struct iphdr *iph, |
385 | __be32 saddr, u8 tos, | 388 | __be32 saddr, u8 tos, u32 mark, |
386 | int type, int code, | 389 | int type, int code, |
387 | struct icmp_bxm *param) | 390 | struct icmp_bxm *param) |
388 | { | 391 | { |
@@ -394,6 +397,7 @@ static struct rtable *icmp_route_lookup(struct net *net, | |||
394 | fl4->daddr = (param->replyopts.opt.opt.srr ? | 397 | fl4->daddr = (param->replyopts.opt.opt.srr ? |
395 | param->replyopts.opt.opt.faddr : iph->saddr); | 398 | param->replyopts.opt.opt.faddr : iph->saddr); |
396 | fl4->saddr = saddr; | 399 | fl4->saddr = saddr; |
400 | fl4->flowi4_mark = mark; | ||
397 | fl4->flowi4_tos = RT_TOS(tos); | 401 | fl4->flowi4_tos = RT_TOS(tos); |
398 | fl4->flowi4_proto = IPPROTO_ICMP; | 402 | fl4->flowi4_proto = IPPROTO_ICMP; |
399 | fl4->fl4_icmp_type = type; | 403 | fl4->fl4_icmp_type = type; |
@@ -491,6 +495,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
491 | struct flowi4 fl4; | 495 | struct flowi4 fl4; |
492 | __be32 saddr; | 496 | __be32 saddr; |
493 | u8 tos; | 497 | u8 tos; |
498 | u32 mark; | ||
494 | struct net *net; | 499 | struct net *net; |
495 | struct sock *sk; | 500 | struct sock *sk; |
496 | 501 | ||
@@ -592,6 +597,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
592 | tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | | 597 | tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | |
593 | IPTOS_PREC_INTERNETCONTROL) : | 598 | IPTOS_PREC_INTERNETCONTROL) : |
594 | iph->tos; | 599 | iph->tos; |
600 | mark = IP4_REPLY_MARK(net, skb_in->mark); | ||
595 | 601 | ||
596 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) | 602 | if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) |
597 | goto out_unlock; | 603 | goto out_unlock; |
@@ -608,13 +614,14 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) | |||
608 | icmp_param->skb = skb_in; | 614 | icmp_param->skb = skb_in; |
609 | icmp_param->offset = skb_network_offset(skb_in); | 615 | icmp_param->offset = skb_network_offset(skb_in); |
610 | inet_sk(sk)->tos = tos; | 616 | inet_sk(sk)->tos = tos; |
617 | sk->sk_mark = mark; | ||
611 | ipc.addr = iph->saddr; | 618 | ipc.addr = iph->saddr; |
612 | ipc.opt = &icmp_param->replyopts.opt; | 619 | ipc.opt = &icmp_param->replyopts.opt; |
613 | ipc.tx_flags = 0; | 620 | ipc.tx_flags = 0; |
614 | ipc.ttl = 0; | 621 | ipc.ttl = 0; |
615 | ipc.tos = -1; | 622 | ipc.tos = -1; |
616 | 623 | ||
617 | rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, | 624 | rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, |
618 | type, code, icmp_param); | 625 | type, code, icmp_param); |
619 | if (IS_ERR(rt)) | 626 | if (IS_ERR(rt)) |
620 | goto out_unlock; | 627 | goto out_unlock; |
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 6aa4380fde1a..6e231ab58d65 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c | |||
@@ -1546,7 +1546,8 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr, | |||
1546 | daddr = replyopts.opt.opt.faddr; | 1546 | daddr = replyopts.opt.opt.faddr; |
1547 | } | 1547 | } |
1548 | 1548 | ||
1549 | flowi4_init_output(&fl4, arg->bound_dev_if, 0, | 1549 | flowi4_init_output(&fl4, arg->bound_dev_if, |
1550 | IP4_REPLY_MARK(net, skb->mark), | ||
1550 | RT_TOS(arg->tos), | 1551 | RT_TOS(arg->tos), |
1551 | RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, | 1552 | RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, |
1552 | ip_reply_arg_flowi_flags(arg), | 1553 | ip_reply_arg_flowi_flags(arg), |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5cde8f263d40..f50d51850285 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -838,6 +838,13 @@ static struct ctl_table ipv4_net_table[] = { | |||
838 | .mode = 0644, | 838 | .mode = 0644, |
839 | .proc_handler = proc_dointvec, | 839 | .proc_handler = proc_dointvec, |
840 | }, | 840 | }, |
841 | { | ||
842 | .procname = "fwmark_reflect", | ||
843 | .data = &init_net.ipv4.sysctl_fwmark_reflect, | ||
844 | .maxlen = sizeof(int), | ||
845 | .mode = 0644, | ||
846 | .proc_handler = proc_dointvec, | ||
847 | }, | ||
841 | { } | 848 | { } |
842 | }; | 849 | }; |
843 | 850 | ||
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8d3952796d39..f6c84a6eb238 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c | |||
@@ -400,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) | |||
400 | int len; | 400 | int len; |
401 | int hlimit; | 401 | int hlimit; |
402 | int err = 0; | 402 | int err = 0; |
403 | u32 mark = IP6_REPLY_MARK(net, skb->mark); | ||
403 | 404 | ||
404 | if ((u8 *)hdr < skb->head || | 405 | if ((u8 *)hdr < skb->head || |
405 | (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) | 406 | (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) |
@@ -466,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) | |||
466 | fl6.daddr = hdr->saddr; | 467 | fl6.daddr = hdr->saddr; |
467 | if (saddr) | 468 | if (saddr) |
468 | fl6.saddr = *saddr; | 469 | fl6.saddr = *saddr; |
470 | fl6.flowi6_mark = mark; | ||
469 | fl6.flowi6_oif = iif; | 471 | fl6.flowi6_oif = iif; |
470 | fl6.fl6_icmp_type = type; | 472 | fl6.fl6_icmp_type = type; |
471 | fl6.fl6_icmp_code = code; | 473 | fl6.fl6_icmp_code = code; |
@@ -474,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) | |||
474 | sk = icmpv6_xmit_lock(net); | 476 | sk = icmpv6_xmit_lock(net); |
475 | if (sk == NULL) | 477 | if (sk == NULL) |
476 | return; | 478 | return; |
479 | sk->sk_mark = mark; | ||
477 | np = inet6_sk(sk); | 480 | np = inet6_sk(sk); |
478 | 481 | ||
479 | if (!icmpv6_xrlim_allow(sk, type, &fl6)) | 482 | if (!icmpv6_xrlim_allow(sk, type, &fl6)) |
@@ -551,6 +554,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) | |||
551 | int err = 0; | 554 | int err = 0; |
552 | int hlimit; | 555 | int hlimit; |
553 | u8 tclass; | 556 | u8 tclass; |
557 | u32 mark = IP6_REPLY_MARK(net, skb->mark); | ||
554 | 558 | ||
555 | saddr = &ipv6_hdr(skb)->daddr; | 559 | saddr = &ipv6_hdr(skb)->daddr; |
556 | 560 | ||
@@ -569,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) | |||
569 | fl6.saddr = *saddr; | 573 | fl6.saddr = *saddr; |
570 | fl6.flowi6_oif = skb->dev->ifindex; | 574 | fl6.flowi6_oif = skb->dev->ifindex; |
571 | fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; | 575 | fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; |
576 | fl6.flowi6_mark = mark; | ||
572 | security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); | 577 | security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); |
573 | 578 | ||
574 | sk = icmpv6_xmit_lock(net); | 579 | sk = icmpv6_xmit_lock(net); |
575 | if (sk == NULL) | 580 | if (sk == NULL) |
576 | return; | 581 | return; |
582 | sk->sk_mark = mark; | ||
577 | np = inet6_sk(sk); | 583 | np = inet6_sk(sk); |
578 | 584 | ||
579 | if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) | 585 | if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) |
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 7f405a168822..058f3eca2e53 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c | |||
@@ -38,6 +38,13 @@ static struct ctl_table ipv6_table_template[] = { | |||
38 | .mode = 0644, | 38 | .mode = 0644, |
39 | .proc_handler = proc_dointvec | 39 | .proc_handler = proc_dointvec |
40 | }, | 40 | }, |
41 | { | ||
42 | .procname = "fwmark_reflect", | ||
43 | .data = &init_net.ipv6.sysctl.fwmark_reflect, | ||
44 | .maxlen = sizeof(int), | ||
45 | .mode = 0644, | ||
46 | .proc_handler = proc_dointvec | ||
47 | }, | ||
41 | { } | 48 | { } |
42 | }; | 49 | }; |
43 | 50 | ||
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3a267bf14f2f..c54976a44425 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -812,6 +812,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, | |||
812 | fl6.flowi6_oif = inet6_iif(skb); | 812 | fl6.flowi6_oif = inet6_iif(skb); |
813 | else | 813 | else |
814 | fl6.flowi6_oif = oif; | 814 | fl6.flowi6_oif = oif; |
815 | fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); | ||
815 | fl6.fl6_dport = t1->dest; | 816 | fl6.fl6_dport = t1->dest; |
816 | fl6.fl6_sport = t1->source; | 817 | fl6.fl6_sport = t1->source; |
817 | security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); | 818 | security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); |