From 84018f55ab883f03d41ec3c9ac7f0cc80830b20f Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 23 Apr 2012 03:35:26 +0000 Subject: netfilter: ip6_tables: add flags parameter to ipv6_find_hdr() This patch adds the flags parameter to ipv6_find_hdr. This flags allows us to: * know if this is a fragment. * stop at the AH header, so the information contained in that header can be used for some specific packet handling. This patch also adds the offset parameter for inspection of one inner IPv6 header that is contained in error messages. Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6/ip6_tables.h | 7 +++++- net/ipv6/netfilter/ip6_tables.c | 36 ++++++++++++++++++++++++++----- net/ipv6/netfilter/ip6t_ah.c | 4 ++-- net/ipv6/netfilter/ip6t_frag.c | 4 ++-- net/ipv6/netfilter/ip6t_hbh.c | 4 ++-- net/ipv6/netfilter/ip6t_rt.c | 4 ++-- net/netfilter/xt_TPROXY.c | 4 ++-- net/netfilter/xt_socket.c | 4 ++-- 8 files changed, 49 insertions(+), 18 deletions(-) diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 1bc898b14a80..08c2cbbaa32b 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -298,9 +298,14 @@ ip6t_ext_hdr(u8 nexthdr) (nexthdr == IPPROTO_DSTOPTS); } +enum { + IP6T_FH_F_FRAG = (1 << 0), + IP6T_FH_F_AUTH = (1 << 1), +}; + /* find specified header and get offset to it */ extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, - int target, unsigned short *fragoff); + int target, unsigned short *fragoff, int *fragflg); #ifdef CONFIG_COMPAT #include diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d4e350f72bbb..308bdd651230 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -133,7 +133,7 @@ ip6_packet_match(const struct sk_buff *skb, int protohdr; unsigned short _frag_off; - protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off); + protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off, NULL); if (protohdr < 0) { if (_frag_off == 0) *hotdrop = true; @@ -362,6 +362,7 @@ ip6t_do_table(struct sk_buff *skb, const struct xt_entry_match *ematch; IP_NF_ASSERT(e); + acpar.thoff = 0; if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) { no_match: @@ -2278,6 +2279,10 @@ static void __exit ip6_tables_fini(void) * if target < 0. "last header" is transport protocol header, ESP, or * "No next header". * + * Note that *offset is used as input/output parameter. an if it is not zero, + * then it must be a valid offset to an inner IPv6 header. This can be used + * to explore inner IPv6 header, eg. ICMPv6 error messages. + * * If target header is found, its offset is set in *offset and return protocol * number. Otherwise, return -1. * @@ -2289,17 +2294,33 @@ static void __exit ip6_tables_fini(void) * *offset is meaningless and fragment offset is stored in *fragoff if fragoff * isn't NULL. * + * if flags is not NULL and it's a fragment, then the frag flag IP6T_FH_F_FRAG + * will be set. If it's an AH header, the IP6T_FH_F_AUTH flag is set and + * target < 0, then this function will stop at the AH header. */ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, - int target, unsigned short *fragoff) + int target, unsigned short *fragoff, int *flags) { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; - unsigned int len = skb->len - start; + unsigned int len; if (fragoff) *fragoff = 0; + if (*offset) { + struct ipv6hdr _ip6, *ip6; + + ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); + if (!ip6 || (ip6->version != 6)) { + printk(KERN_ERR "IPv6 header not found\n"); + return -EBADMSG; + } + start = *offset + sizeof(struct ipv6hdr); + nexthdr = ip6->nexthdr; + } + len = skb->len - start; + while (nexthdr != target) { struct ipv6_opt_hdr _hdr, *hp; unsigned int hdrlen; @@ -2316,6 +2337,9 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (nexthdr == NEXTHDR_FRAGMENT) { unsigned short _frag_off; __be16 *fp; + + if (flags) /* Indicate that this is a fragment */ + *flags |= IP6T_FH_F_FRAG; fp = skb_header_pointer(skb, start+offsetof(struct frag_hdr, frag_off), @@ -2336,9 +2360,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, return -ENOENT; } hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) + } else if (nexthdr == NEXTHDR_AUTH) { + if (flags && (*flags & IP6T_FH_F_AUTH) && (target < 0)) + break; hdrlen = (hp->hdrlen + 2) << 2; - else + } else hdrlen = ipv6_optlen(hp); nexthdr = hp->nexthdr; diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 89cccc5a9c92..04099ab7d2e3 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -41,11 +41,11 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par) struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; const struct ip6t_ah *ahinfo = par->matchinfo; - unsigned int ptr; + unsigned int ptr = 0; unsigned int hdrlen = 0; int err; - err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL); + err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index eda898fda6ca..3b5735e56bfe 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -40,10 +40,10 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par) struct frag_hdr _frag; const struct frag_hdr *fh; const struct ip6t_frag *fraginfo = par->matchinfo; - unsigned int ptr; + unsigned int ptr = 0; int err; - err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL); + err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 59df051eaef6..01df142bb027 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -50,7 +50,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par) const struct ipv6_opt_hdr *oh; const struct ip6t_opts *optinfo = par->matchinfo; unsigned int temp; - unsigned int ptr; + unsigned int ptr = 0; unsigned int hdrlen = 0; bool ret = false; u8 _opttype; @@ -62,7 +62,7 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par) err = ipv6_find_hdr(skb, &ptr, (par->match == &hbh_mt6_reg[0]) ? - NEXTHDR_HOP : NEXTHDR_DEST, NULL); + NEXTHDR_HOP : NEXTHDR_DEST, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index d8488c50a8e0..2c99b94eeca3 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -42,14 +42,14 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) const struct ipv6_rt_hdr *rh; const struct ip6t_rt *rtinfo = par->matchinfo; unsigned int temp; - unsigned int ptr; + unsigned int ptr = 0; unsigned int hdrlen = 0; bool ret = false; struct in6_addr _addr; const struct in6_addr *ap; int err; - err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); + err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 35a959a096e0..146033a86de8 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -282,10 +282,10 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par) struct sock *sk; const struct in6_addr *laddr; __be16 lport; - int thoff; + int thoff = 0; int tproto; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NF_DROP; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 72bb07f57f97..9ea482d08cf7 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -263,10 +263,10 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) struct sock *sk; struct in6_addr *daddr, *saddr; __be16 dport, sport; - int thoff, tproto; + int thoff = 0, tproto; const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; - tproto = ipv6_find_hdr(skb, &thoff, -1, NULL); + tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); return NF_DROP; -- cgit v1.2.2 From cf308a1fae432f315989e2da6878bfaa3daa22b1 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Wed, 2 May 2012 07:49:47 +0000 Subject: netfilter: add xt_hmark target for hash-based skb marking The target allows you to create rules in the "raw" and "mangle" tables which set the skbuff mark by means of hash calculation within a given range. The nfmark can influence the routing method (see "Use netfilter MARK value as routing key") and can also be used by other subsystems to change their behaviour. [ Part of this patch has been refactorized and modified by Pablo Neira Ayuso ] Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_HMARK.h | 45 +++++ net/netfilter/Kconfig | 15 ++ net/netfilter/Makefile | 1 + net/netfilter/xt_HMARK.c | 362 +++++++++++++++++++++++++++++++++++++ 4 files changed, 423 insertions(+) create mode 100644 include/linux/netfilter/xt_HMARK.h create mode 100644 net/netfilter/xt_HMARK.c diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h new file mode 100644 index 000000000000..abb1650940d2 --- /dev/null +++ b/include/linux/netfilter/xt_HMARK.h @@ -0,0 +1,45 @@ +#ifndef XT_HMARK_H_ +#define XT_HMARK_H_ + +#include + +enum { + XT_HMARK_SADDR_MASK, + XT_HMARK_DADDR_MASK, + XT_HMARK_SPI, + XT_HMARK_SPI_MASK, + XT_HMARK_SPORT, + XT_HMARK_DPORT, + XT_HMARK_SPORT_MASK, + XT_HMARK_DPORT_MASK, + XT_HMARK_PROTO_MASK, + XT_HMARK_RND, + XT_HMARK_MODULUS, + XT_HMARK_OFFSET, + XT_HMARK_CT, + XT_HMARK_METHOD_L3, + XT_HMARK_METHOD_L3_4, +}; +#define XT_HMARK_FLAG(flag) (1 << flag) + +union hmark_ports { + struct { + __u16 src; + __u16 dst; + } p16; + __u32 v32; +}; + +struct xt_hmark_info { + union nf_inet_addr src_mask; + union nf_inet_addr dst_mask; + union hmark_ports port_mask; + union hmark_ports port_set; + __u32 flags; + __u16 proto_mask; + __u32 hashrnd; + __u32 hmodulus; + __u32 hoffset; /* Mark offset to start from */ +}; + +#endif /* XT_HMARK_H_ */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0c6f67e8f2e5..209c1ed43368 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -509,6 +509,21 @@ config NETFILTER_XT_TARGET_HL since you can easily create immortal packets that loop forever on the network. +config NETFILTER_XT_TARGET_HMARK + tristate '"HMARK" target support' + depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on NETFILTER_ADVANCED + ---help--- + This option adds the "HMARK" target. + + The target allows you to create rules in the "raw" and "mangle" tables + which set the skbuff mark by means of hash calculation within a given + range. The nfmark can influence the routing method (see "Use netfilter + MARK value as routing key") and can also be used by other subsystems to + change their behaviour. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_IDLETIMER tristate "IDLETIMER target support" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index ca3676586f51..4e7960cc7b97 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o +obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c new file mode 100644 index 000000000000..32fbd735d02b --- /dev/null +++ b/net/netfilter/xt_HMARK.c @@ -0,0 +1,362 @@ +/* + * xt_HMARK - Netfilter module to set mark by means of hashing + * + * (C) 2012 by Hans Schillstrom + * (C) 2012 by Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include +#include +#include + +#include +#include + +#include +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include +#endif +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +#include +#include +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hans Schillstrom "); +MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); +MODULE_ALIAS("ipt_HMARK"); +MODULE_ALIAS("ip6t_HMARK"); + +struct hmark_tuple { + u32 src; + u32 dst; + union hmark_ports uports; + uint8_t proto; +}; + +static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) +{ + return (addr32[0] & mask[0]) ^ + (addr32[1] & mask[1]) ^ + (addr32[2] & mask[2]) ^ + (addr32[3] & mask[3]); +} + +static inline u32 +hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) +{ + switch (l3num) { + case AF_INET: + return *addr32 & *mask; + case AF_INET6: + return hmark_addr6_mask(addr32, mask); + } + return 0; +} + +static int +hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, + const struct xt_hmark_info *info) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conntrack_tuple *otuple; + struct nf_conntrack_tuple *rtuple; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return -1; + + otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all, + info->src_mask.all); + t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all, + info->dst_mask.all); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) + return 0; + + t->proto = nf_ct_protonum(ct); + if (t->proto != IPPROTO_ICMP) { + t->uports.p16.src = otuple->src.u.all; + t->uports.p16.dst = rtuple->src.u.all; + t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | + info->port_set.v32; + if (t->uports.p16.dst < t->uports.p16.src) + swap(t->uports.p16.dst, t->uports.p16.src); + } + + return 0; +#else + return -1; +#endif +} + +static inline u32 +hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) +{ + u32 hash; + + if (t->dst < t->src) + swap(t->src, t->dst); + + hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd); + hash = hash ^ (t->proto & info->proto_mask); + + return (hash % info->hmodulus) + info->hoffset; +} + +static void +hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, + struct hmark_tuple *t, const struct xt_hmark_info *info) +{ + int protoff; + + protoff = proto_ports_offset(t->proto); + if (protoff < 0) + return; + + nhoff += protoff; + if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) + return; + + t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | + info->port_set.v32; + + if (t->uports.p16.dst < t->uports.p16.src) + swap(t->uports.p16.dst, t->uports.p16.src); +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static int get_inner6_hdr(const struct sk_buff *skb, int *offset) +{ + struct icmp6hdr *icmp6h, _ih6; + + icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); + if (icmp6h == NULL) + return 0; + + if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { + *offset += sizeof(struct icmp6hdr); + return 1; + } + return 0; +} + +static int +hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, + const struct xt_hmark_info *info) +{ + struct ipv6hdr *ip6, _ip6; + int flag = IP6T_FH_F_AUTH; + unsigned int nhoff = 0; + u16 fragoff = 0; + int nexthdr; + + ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); + nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); + if (nexthdr < 0) + return 0; + /* No need to check for icmp errors on fragments */ + if ((flag & IP6T_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) + goto noicmp; + /* Use inner header in case of ICMP errors */ + if (get_inner6_hdr(skb, &nhoff)) { + ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); + if (ip6 == NULL) + return -1; + /* If AH present, use SPI like in ESP. */ + flag = IP6T_FH_F_AUTH; + nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); + if (nexthdr < 0) + return -1; + } +noicmp: + t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all); + t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) + return 0; + + t->proto = nexthdr; + if (t->proto == IPPROTO_ICMPV6) + return 0; + + if (flag & IP6T_FH_F_FRAG) + return 0; + + hmark_set_tuple_ports(skb, nhoff, t, info); + return 0; +} + +static unsigned int +hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_hmark_info *info = par->targinfo; + struct hmark_tuple t; + + memset(&t, 0, sizeof(struct hmark_tuple)); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { + if (hmark_ct_set_htuple(skb, &t, info) < 0) + return XT_CONTINUE; + } else { + if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) + return XT_CONTINUE; + } + + skb->mark = hmark_hash(&t, info); + return XT_CONTINUE; +} +#endif + +static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) +{ + const struct icmphdr *icmph; + struct icmphdr _ih; + + /* Not enough header? */ + icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); + if (icmph == NULL && icmph->type > NR_ICMP_TYPES) + return 0; + + /* Error message? */ + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_SOURCE_QUENCH && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB && + icmph->type != ICMP_REDIRECT) + return 0; + + *nhoff += iphsz + sizeof(_ih); + return 1; +} + +static int +hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, + const struct xt_hmark_info *info) +{ + struct iphdr *ip, _ip; + int nhoff = skb_network_offset(skb); + + ip = (struct iphdr *) (skb->data + nhoff); + if (ip->protocol == IPPROTO_ICMP) { + /* Use inner header in case of ICMP errors */ + if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { + ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); + if (ip == NULL) + return -1; + } + } + + t->src = (__force u32) ip->saddr; + t->dst = (__force u32) ip->daddr; + + t->src &= info->src_mask.ip; + t->dst &= info->dst_mask.ip; + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) + return 0; + + t->proto = ip->protocol; + + /* ICMP has no ports, skip */ + if (t->proto == IPPROTO_ICMP) + return 0; + + /* follow-up fragments don't contain ports, skip all fragments */ + if (ip->frag_off & htons(IP_MF | IP_OFFSET)) + return 0; + + hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); + + return 0; +} + +static unsigned int +hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_hmark_info *info = par->targinfo; + struct hmark_tuple t; + + memset(&t, 0, sizeof(struct hmark_tuple)); + + if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { + if (hmark_ct_set_htuple(skb, &t, info) < 0) + return XT_CONTINUE; + } else { + if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) + return XT_CONTINUE; + } + + skb->mark = hmark_hash(&t, info); + return XT_CONTINUE; +} + +static int hmark_tg_check(const struct xt_tgchk_param *par) +{ + const struct xt_hmark_info *info = par->targinfo; + + if (!info->hmodulus) { + pr_info("xt_HMARK: hash modulus can't be zero\n"); + return -EINVAL; + } + if (info->proto_mask && + (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { + pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); + return -EINVAL; + } + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && + (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | + XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) { + pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n"); + return -EINVAL; + } + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && + (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | + XT_HMARK_FLAG(XT_HMARK_DPORT)))) { + pr_info("xt_HMARK: spi-set and port-set can't be combined\n"); + return -EINVAL; + } + return 0; +} + +static struct xt_target hmark_tg_reg[] __read_mostly = { + { + .name = "HMARK", + .family = NFPROTO_IPV4, + .target = hmark_tg_v4, + .targetsize = sizeof(struct xt_hmark_info), + .checkentry = hmark_tg_check, + .me = THIS_MODULE, + }, +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + { + .name = "HMARK", + .family = NFPROTO_IPV6, + .target = hmark_tg_v6, + .targetsize = sizeof(struct xt_hmark_info), + .checkentry = hmark_tg_check, + .me = THIS_MODULE, + }, +#endif +}; + +static int __init hmark_tg_init(void) +{ + return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); +} + +static void __exit hmark_tg_exit(void) +{ + xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); +} + +module_init(hmark_tg_init); +module_exit(hmark_tg_exit); -- cgit v1.2.2 From 7a909ac70f6b0823d9f23a43f19598d4b57ac901 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2012 10:51:43 +0000 Subject: netfilter: limit, hashlimit: avoid duplicated inline credit_cap can be set to credit, which avoids inlining user2credits twice. Also, remove inline keyword and let compiler decide. old: 684 192 0 876 36c net/netfilter/xt_limit.o 4927 344 32 5303 14b7 net/netfilter/xt_hashlimit.o now: 668 192 0 860 35c net/netfilter/xt_limit.o 4793 344 32 5169 1431 net/netfilter/xt_hashlimit.o Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 8 +++----- net/netfilter/xt_limit.c | 5 ++--- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d95f9c963cde..2195eb0727a3 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -389,8 +389,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) /* Precision saver. */ -static inline u_int32_t -user2credits(u_int32_t user) +static u32 user2credits(u32 user) { /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) @@ -400,7 +399,7 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; } -static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) { dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY; if (dh->rateinfo.credit > dh->rateinfo.credit_cap) @@ -535,8 +534,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) dh->rateinfo.prev = jiffies; dh->rateinfo.credit = user2credits(hinfo->cfg.avg * hinfo->cfg.burst); - dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); + dh->rateinfo.credit_cap = dh->rateinfo.credit; dh->rateinfo.cost = user2credits(hinfo->cfg.avg); } else { /* update expiration timeout */ diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 32b7a579a032..5c22ce8ab309 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -88,8 +88,7 @@ limit_mt(const struct sk_buff *skb, struct xt_action_param *par) } /* Precision saver. */ -static u_int32_t -user2credits(u_int32_t user) +static u32 user2credits(u32 user) { /* If multiplying would overflow... */ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) @@ -123,7 +122,7 @@ static int limit_mt_check(const struct xt_mtchk_param *par) 128. */ priv->prev = jiffies; priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ - r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = priv->credit; /* Credits full. */ r->cost = user2credits(r->avg); } return 0; -- cgit v1.2.2 From 817e076f61bca3d0270af60632d1fe07cd4919f1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2012 10:51:44 +0000 Subject: netfilter: hashlimit: move rateinfo initialization to helper followup patch would bloat main match function too much. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2195eb0727a3..b6bbd0630e5f 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -407,6 +407,15 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) dh->rateinfo.prev = now; } +static void rateinfo_init(struct dsthash_ent *dh, + struct xt_hashlimit_htable *hinfo) +{ + dh->rateinfo.prev = jiffies; + dh->rateinfo.credit = user2credits(hinfo->cfg.avg * hinfo->cfg.burst); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + dh->rateinfo.credit_cap = dh->rateinfo.credit; +} + static inline __be32 maskl(__be32 a, unsigned int l) { return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0; @@ -531,11 +540,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) goto hotdrop; } dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - dh->rateinfo.prev = jiffies; - dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.credit_cap = dh->rateinfo.credit; - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + rateinfo_init(dh, hinfo); } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); -- cgit v1.2.2 From 0197dee7d3182bb6b6a21955860dfa14fa022d84 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 7 May 2012 10:51:45 +0000 Subject: netfilter: hashlimit: byte-based limit mode can be used e.g. for ingress traffic policing or to detect when a host/port consumes more bandwidth than expected. This is done by optionally making cost to mean "cost per 16-byte-chunk-of-data" instead of "cost per packet". Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_hashlimit.h | 10 ++- net/netfilter/xt_hashlimit.c | 116 +++++++++++++++++++++++++++------ 2 files changed, 106 insertions(+), 20 deletions(-) diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h index b1925b5925e9..05fe7993dd76 100644 --- a/include/linux/netfilter/xt_hashlimit.h +++ b/include/linux/netfilter/xt_hashlimit.h @@ -6,7 +6,11 @@ /* timings are in milliseconds. */ #define XT_HASHLIMIT_SCALE 10000 /* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 - seconds, or one every 59 hours. */ + * seconds, or one packet every 59 hours. + */ + +/* packet length accounting is done in 16-byte steps */ +#define XT_HASHLIMIT_BYTE_SHIFT 4 /* details of this structure hidden by the implementation */ struct xt_hashlimit_htable; @@ -17,6 +21,10 @@ enum { XT_HASHLIMIT_HASH_SIP = 1 << 2, XT_HASHLIMIT_HASH_SPT = 1 << 3, XT_HASHLIMIT_INVERT = 1 << 4, + XT_HASHLIMIT_BYTES = 1 << 5, +#ifdef __KERNEL__ + XT_HASHLIMIT_MAX = 1 << 6, +#endif }; struct hashlimit_cfg { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index b6bbd0630e5f..d0424f9621f2 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -388,6 +388,18 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) +/* in byte mode, the lowest possible rate is one packet/second. + * credit_cap is used as a counter that tells us how many times we can + * refill the "credits available" counter when it becomes empty. + */ +#define MAX_CPJ_BYTES (0xFFFFFFFF / HZ) +#define CREDITS_PER_JIFFY_BYTES POW2_BELOW32(MAX_CPJ_BYTES) + +static u32 xt_hashlimit_len_to_chunks(u32 len) +{ + return (len >> XT_HASHLIMIT_BYTE_SHIFT) + 1; +} + /* Precision saver. */ static u32 user2credits(u32 user) { @@ -399,21 +411,53 @@ static u32 user2credits(u32 user) return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; } -static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) +static u32 user2credits_byte(u32 user) { - dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY; - if (dh->rateinfo.credit > dh->rateinfo.credit_cap) - dh->rateinfo.credit = dh->rateinfo.credit_cap; + u64 us = user; + us *= HZ * CREDITS_PER_JIFFY_BYTES; + return (u32) (us >> 32); +} + +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) +{ + unsigned long delta = now - dh->rateinfo.prev; + u32 cap; + + if (delta == 0) + return; + dh->rateinfo.prev = now; + + if (mode & XT_HASHLIMIT_BYTES) { + u32 tmp = dh->rateinfo.credit; + dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta; + cap = CREDITS_PER_JIFFY_BYTES * HZ; + if (tmp >= dh->rateinfo.credit) {/* overflow */ + dh->rateinfo.credit = cap; + return; + } + } else { + dh->rateinfo.credit += delta * CREDITS_PER_JIFFY; + cap = dh->rateinfo.credit_cap; + } + if (dh->rateinfo.credit > cap) + dh->rateinfo.credit = cap; } static void rateinfo_init(struct dsthash_ent *dh, struct xt_hashlimit_htable *hinfo) { dh->rateinfo.prev = jiffies; - dh->rateinfo.credit = user2credits(hinfo->cfg.avg * hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); - dh->rateinfo.credit_cap = dh->rateinfo.credit; + if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { + dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ; + dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg); + dh->rateinfo.credit_cap = hinfo->cfg.burst; + } else { + dh->rateinfo.credit = user2credits(hinfo->cfg.avg * + hinfo->cfg.burst); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + dh->rateinfo.credit_cap = dh->rateinfo.credit; + } } static inline __be32 maskl(__be32 a, unsigned int l) @@ -519,6 +563,21 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, return 0; } +static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) +{ + u64 tmp = xt_hashlimit_len_to_chunks(len); + tmp = tmp * dh->rateinfo.cost; + + if (unlikely(tmp > CREDITS_PER_JIFFY_BYTES * HZ)) + tmp = CREDITS_PER_JIFFY_BYTES * HZ; + + if (dh->rateinfo.credit < tmp && dh->rateinfo.credit_cap) { + dh->rateinfo.credit_cap--; + dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ; + } + return (u32) tmp; +} + static bool hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) { @@ -527,6 +586,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; + u32 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; @@ -544,12 +604,17 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now); + rateinfo_recalc(dh, now, hinfo->cfg.mode); } - if (dh->rateinfo.credit >= dh->rateinfo.cost) { + if (info->cfg.mode & XT_HASHLIMIT_BYTES) + cost = hashlimit_byte_cost(skb->len, dh); + else + cost = dh->rateinfo.cost; + + if (dh->rateinfo.credit >= cost) { /* below the limit */ - dh->rateinfo.credit -= dh->rateinfo.cost; + dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); rcu_read_unlock_bh(); return !(info->cfg.mode & XT_HASHLIMIT_INVERT); @@ -571,14 +636,6 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) struct xt_hashlimit_mtinfo1 *info = par->matchinfo; int ret; - /* Check for overflow. */ - if (info->cfg.burst == 0 || - user2credits(info->cfg.avg * info->cfg.burst) < - user2credits(info->cfg.avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->cfg.avg, info->cfg.burst); - return -ERANGE; - } if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) return -EINVAL; if (info->name[sizeof(info->name)-1] != '\0') @@ -591,6 +648,26 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } + if (info->cfg.mode >= XT_HASHLIMIT_MAX) { + pr_info("Unknown mode mask %X, kernel too old?\n", + info->cfg.mode); + return -EINVAL; + } + + /* Check for overflow. */ + if (info->cfg.mode & XT_HASHLIMIT_BYTES) { + if (user2credits_byte(info->cfg.avg) == 0) { + pr_info("overflow, rate too high: %u\n", info->cfg.avg); + return -EINVAL; + } + } else if (info->cfg.burst == 0 || + user2credits(info->cfg.avg * info->cfg.burst) < + user2credits(info->cfg.avg)) { + pr_info("overflow, try lower: %u/%u\n", + info->cfg.avg, info->cfg.burst); + return -ERANGE; + } + mutex_lock(&hashlimit_mutex); info->hinfo = htable_find_get(net, info->name, par->family); if (info->hinfo == NULL) { @@ -683,10 +760,11 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { int res; + const struct xt_hashlimit_htable *ht = s->private; spin_lock(&ent->lock); /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies); + rateinfo_recalc(ent, jiffies, ht->cfg.mode); switch (family) { case NFPROTO_IPV4: -- cgit v1.2.2