diff options
-rw-r--r-- | include/net/netfilter/nft_fib.h | 2 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_fib_ipv4.c | 23 | ||||
-rw-r--r-- | net/ipv6/netfilter/nft_fib_ipv6.c | 16 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_core.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_flow_table_ip.c | 3 | ||||
-rw-r--r-- | net/netfilter/nf_nat_helper.c | 2 | ||||
-rw-r--r-- | net/netfilter/nf_queue.c | 1 | ||||
-rw-r--r-- | net/netfilter/nf_tables_api.c | 20 | ||||
-rw-r--r-- | net/netfilter/nft_fib.c | 6 | ||||
-rw-r--r-- | net/netfilter/nft_flow_offload.c | 31 | ||||
-rw-r--r-- | tools/testing/selftests/netfilter/Makefile | 2 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/nft_flowtable.sh | 324 | ||||
-rwxr-xr-x | tools/testing/selftests/netfilter/nft_nat.sh | 6 |
13 files changed, 375 insertions, 63 deletions
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index a88f92737308..e4c4d8eaca8c 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h | |||
@@ -34,5 +34,5 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, | |||
34 | const struct nft_pktinfo *pkt); | 34 | const struct nft_pktinfo *pkt); |
35 | 35 | ||
36 | void nft_fib_store_result(void *reg, const struct nft_fib *priv, | 36 | void nft_fib_store_result(void *reg, const struct nft_fib *priv, |
37 | const struct nft_pktinfo *pkt, int index); | 37 | const struct net_device *dev); |
38 | #endif | 38 | #endif |
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 94eb25bc8d7e..c8888e52591f 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c | |||
@@ -58,11 +58,6 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, | |||
58 | } | 58 | } |
59 | EXPORT_SYMBOL_GPL(nft_fib4_eval_type); | 59 | EXPORT_SYMBOL_GPL(nft_fib4_eval_type); |
60 | 60 | ||
61 | static int get_ifindex(const struct net_device *dev) | ||
62 | { | ||
63 | return dev ? dev->ifindex : 0; | ||
64 | } | ||
65 | |||
66 | void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, | 61 | void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, |
67 | const struct nft_pktinfo *pkt) | 62 | const struct nft_pktinfo *pkt) |
68 | { | 63 | { |
@@ -94,8 +89,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, | |||
94 | 89 | ||
95 | if (nft_hook(pkt) == NF_INET_PRE_ROUTING && | 90 | if (nft_hook(pkt) == NF_INET_PRE_ROUTING && |
96 | nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { | 91 | nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { |
97 | nft_fib_store_result(dest, priv, pkt, | 92 | nft_fib_store_result(dest, priv, nft_in(pkt)); |
98 | nft_in(pkt)->ifindex); | ||
99 | return; | 93 | return; |
100 | } | 94 | } |
101 | 95 | ||
@@ -108,8 +102,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, | |||
108 | if (ipv4_is_zeronet(iph->saddr)) { | 102 | if (ipv4_is_zeronet(iph->saddr)) { |
109 | if (ipv4_is_lbcast(iph->daddr) || | 103 | if (ipv4_is_lbcast(iph->daddr) || |
110 | ipv4_is_local_multicast(iph->daddr)) { | 104 | ipv4_is_local_multicast(iph->daddr)) { |
111 | nft_fib_store_result(dest, priv, pkt, | 105 | nft_fib_store_result(dest, priv, pkt->skb->dev); |
112 | get_ifindex(pkt->skb->dev)); | ||
113 | return; | 106 | return; |
114 | } | 107 | } |
115 | } | 108 | } |
@@ -150,17 +143,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, | |||
150 | found = oif; | 143 | found = oif; |
151 | } | 144 | } |
152 | 145 | ||
153 | switch (priv->result) { | 146 | nft_fib_store_result(dest, priv, found); |
154 | case NFT_FIB_RESULT_OIF: | ||
155 | *dest = found->ifindex; | ||
156 | break; | ||
157 | case NFT_FIB_RESULT_OIFNAME: | ||
158 | strncpy((char *)dest, found->name, IFNAMSIZ); | ||
159 | break; | ||
160 | default: | ||
161 | WARN_ON_ONCE(1); | ||
162 | break; | ||
163 | } | ||
164 | } | 147 | } |
165 | EXPORT_SYMBOL_GPL(nft_fib4_eval); | 148 | EXPORT_SYMBOL_GPL(nft_fib4_eval); |
166 | 149 | ||
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 73cdc0bc63f7..ec068b0cffca 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c | |||
@@ -169,8 +169,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, | |||
169 | 169 | ||
170 | if (nft_hook(pkt) == NF_INET_PRE_ROUTING && | 170 | if (nft_hook(pkt) == NF_INET_PRE_ROUTING && |
171 | nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { | 171 | nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { |
172 | nft_fib_store_result(dest, priv, pkt, | 172 | nft_fib_store_result(dest, priv, nft_in(pkt)); |
173 | nft_in(pkt)->ifindex); | ||
174 | return; | 173 | return; |
175 | } | 174 | } |
176 | 175 | ||
@@ -187,18 +186,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, | |||
187 | if (oif && oif != rt->rt6i_idev->dev) | 186 | if (oif && oif != rt->rt6i_idev->dev) |
188 | goto put_rt_err; | 187 | goto put_rt_err; |
189 | 188 | ||
190 | switch (priv->result) { | 189 | nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); |
191 | case NFT_FIB_RESULT_OIF: | ||
192 | *dest = rt->rt6i_idev->dev->ifindex; | ||
193 | break; | ||
194 | case NFT_FIB_RESULT_OIFNAME: | ||
195 | strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); | ||
196 | break; | ||
197 | default: | ||
198 | WARN_ON_ONCE(1); | ||
199 | break; | ||
200 | } | ||
201 | |||
202 | put_rt_err: | 190 | put_rt_err: |
203 | ip6_rt_put(rt); | 191 | ip6_rt_put(rt); |
204 | } | 192 | } |
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 14457551bcb4..8ebf21149ec3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c | |||
@@ -2312,7 +2312,6 @@ static void __net_exit __ip_vs_cleanup(struct net *net) | |||
2312 | { | 2312 | { |
2313 | struct netns_ipvs *ipvs = net_ipvs(net); | 2313 | struct netns_ipvs *ipvs = net_ipvs(net); |
2314 | 2314 | ||
2315 | nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | ||
2316 | ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ | 2315 | ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ |
2317 | ip_vs_conn_net_cleanup(ipvs); | 2316 | ip_vs_conn_net_cleanup(ipvs); |
2318 | ip_vs_app_net_cleanup(ipvs); | 2317 | ip_vs_app_net_cleanup(ipvs); |
@@ -2327,6 +2326,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net) | |||
2327 | { | 2326 | { |
2328 | struct netns_ipvs *ipvs = net_ipvs(net); | 2327 | struct netns_ipvs *ipvs = net_ipvs(net); |
2329 | EnterFunction(2); | 2328 | EnterFunction(2); |
2329 | nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); | ||
2330 | ipvs->enable = 0; /* Disable packet reception */ | 2330 | ipvs->enable = 0; /* Disable packet reception */ |
2331 | smp_wmb(); | 2331 | smp_wmb(); |
2332 | ip_vs_sync_net_cleanup(ipvs); | 2332 | ip_vs_sync_net_cleanup(ipvs); |
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 96825e20368f..241317473114 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c | |||
@@ -244,8 +244,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb, | |||
244 | rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; | 244 | rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; |
245 | outdev = rt->dst.dev; | 245 | outdev = rt->dst.dev; |
246 | 246 | ||
247 | if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && | 247 | if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) |
248 | (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) | ||
249 | return NF_ACCEPT; | 248 | return NF_ACCEPT; |
250 | 249 | ||
251 | if (skb_try_make_writable(skb, sizeof(*iph))) | 250 | if (skb_try_make_writable(skb, sizeof(*iph))) |
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index ccc06f7539d7..53aeb12b70fb 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c | |||
@@ -170,7 +170,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, | |||
170 | if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) | 170 | if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) |
171 | return true; | 171 | return true; |
172 | 172 | ||
173 | nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP, | 173 | nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_UDP, |
174 | udph, &udph->check, datalen, oldlen); | 174 | udph, &udph->check, datalen, oldlen); |
175 | 175 | ||
176 | return true; | 176 | return true; |
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 9dc1d6e04946..b5b2be55ca82 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c | |||
@@ -255,6 +255,7 @@ static unsigned int nf_iterate(struct sk_buff *skb, | |||
255 | repeat: | 255 | repeat: |
256 | verdict = nf_hook_entry_hookfn(hook, skb, state); | 256 | verdict = nf_hook_entry_hookfn(hook, skb, state); |
257 | if (verdict != NF_ACCEPT) { | 257 | if (verdict != NF_ACCEPT) { |
258 | *index = i; | ||
258 | if (verdict != NF_REPEAT) | 259 | if (verdict != NF_REPEAT) |
259 | return verdict; | 260 | return verdict; |
260 | goto repeat; | 261 | goto repeat; |
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 28241e82fd15..4b5159936034 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c | |||
@@ -2270,13 +2270,13 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, | |||
2270 | u32 flags, int family, | 2270 | u32 flags, int family, |
2271 | const struct nft_table *table, | 2271 | const struct nft_table *table, |
2272 | const struct nft_chain *chain, | 2272 | const struct nft_chain *chain, |
2273 | const struct nft_rule *rule) | 2273 | const struct nft_rule *rule, |
2274 | const struct nft_rule *prule) | ||
2274 | { | 2275 | { |
2275 | struct nlmsghdr *nlh; | 2276 | struct nlmsghdr *nlh; |
2276 | struct nfgenmsg *nfmsg; | 2277 | struct nfgenmsg *nfmsg; |
2277 | const struct nft_expr *expr, *next; | 2278 | const struct nft_expr *expr, *next; |
2278 | struct nlattr *list; | 2279 | struct nlattr *list; |
2279 | const struct nft_rule *prule; | ||
2280 | u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); | 2280 | u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); |
2281 | 2281 | ||
2282 | nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); | 2282 | nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); |
@@ -2296,8 +2296,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, | |||
2296 | NFTA_RULE_PAD)) | 2296 | NFTA_RULE_PAD)) |
2297 | goto nla_put_failure; | 2297 | goto nla_put_failure; |
2298 | 2298 | ||
2299 | if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { | 2299 | if (event != NFT_MSG_DELRULE && prule) { |
2300 | prule = list_prev_entry(rule, list); | ||
2301 | if (nla_put_be64(skb, NFTA_RULE_POSITION, | 2300 | if (nla_put_be64(skb, NFTA_RULE_POSITION, |
2302 | cpu_to_be64(prule->handle), | 2301 | cpu_to_be64(prule->handle), |
2303 | NFTA_RULE_PAD)) | 2302 | NFTA_RULE_PAD)) |
@@ -2344,7 +2343,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, | |||
2344 | 2343 | ||
2345 | err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, | 2344 | err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, |
2346 | event, 0, ctx->family, ctx->table, | 2345 | event, 0, ctx->family, ctx->table, |
2347 | ctx->chain, rule); | 2346 | ctx->chain, rule, NULL); |
2348 | if (err < 0) { | 2347 | if (err < 0) { |
2349 | kfree_skb(skb); | 2348 | kfree_skb(skb); |
2350 | goto err; | 2349 | goto err; |
@@ -2369,12 +2368,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, | |||
2369 | const struct nft_chain *chain) | 2368 | const struct nft_chain *chain) |
2370 | { | 2369 | { |
2371 | struct net *net = sock_net(skb->sk); | 2370 | struct net *net = sock_net(skb->sk); |
2371 | const struct nft_rule *rule, *prule; | ||
2372 | unsigned int s_idx = cb->args[0]; | 2372 | unsigned int s_idx = cb->args[0]; |
2373 | const struct nft_rule *rule; | ||
2374 | 2373 | ||
2374 | prule = NULL; | ||
2375 | list_for_each_entry_rcu(rule, &chain->rules, list) { | 2375 | list_for_each_entry_rcu(rule, &chain->rules, list) { |
2376 | if (!nft_is_active(net, rule)) | 2376 | if (!nft_is_active(net, rule)) |
2377 | goto cont; | 2377 | goto cont_skip; |
2378 | if (*idx < s_idx) | 2378 | if (*idx < s_idx) |
2379 | goto cont; | 2379 | goto cont; |
2380 | if (*idx > s_idx) { | 2380 | if (*idx > s_idx) { |
@@ -2386,11 +2386,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb, | |||
2386 | NFT_MSG_NEWRULE, | 2386 | NFT_MSG_NEWRULE, |
2387 | NLM_F_MULTI | NLM_F_APPEND, | 2387 | NLM_F_MULTI | NLM_F_APPEND, |
2388 | table->family, | 2388 | table->family, |
2389 | table, chain, rule) < 0) | 2389 | table, chain, rule, prule) < 0) |
2390 | return 1; | 2390 | return 1; |
2391 | 2391 | ||
2392 | nl_dump_check_consistent(cb, nlmsg_hdr(skb)); | 2392 | nl_dump_check_consistent(cb, nlmsg_hdr(skb)); |
2393 | cont: | 2393 | cont: |
2394 | prule = rule; | ||
2395 | cont_skip: | ||
2394 | (*idx)++; | 2396 | (*idx)++; |
2395 | } | 2397 | } |
2396 | return 0; | 2398 | return 0; |
@@ -2546,7 +2548,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, | |||
2546 | 2548 | ||
2547 | err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, | 2549 | err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, |
2548 | nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, | 2550 | nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, |
2549 | family, table, chain, rule); | 2551 | family, table, chain, rule, NULL); |
2550 | if (err < 0) | 2552 | if (err < 0) |
2551 | goto err; | 2553 | goto err; |
2552 | 2554 | ||
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 21df8cccea65..77f00a99dfab 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c | |||
@@ -135,17 +135,17 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr) | |||
135 | EXPORT_SYMBOL_GPL(nft_fib_dump); | 135 | EXPORT_SYMBOL_GPL(nft_fib_dump); |
136 | 136 | ||
137 | void nft_fib_store_result(void *reg, const struct nft_fib *priv, | 137 | void nft_fib_store_result(void *reg, const struct nft_fib *priv, |
138 | const struct nft_pktinfo *pkt, int index) | 138 | const struct net_device *dev) |
139 | { | 139 | { |
140 | struct net_device *dev; | ||
141 | u32 *dreg = reg; | 140 | u32 *dreg = reg; |
141 | int index; | ||
142 | 142 | ||
143 | switch (priv->result) { | 143 | switch (priv->result) { |
144 | case NFT_FIB_RESULT_OIF: | 144 | case NFT_FIB_RESULT_OIF: |
145 | index = dev ? dev->ifindex : 0; | ||
145 | *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; | 146 | *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; |
146 | break; | 147 | break; |
147 | case NFT_FIB_RESULT_OIFNAME: | 148 | case NFT_FIB_RESULT_OIFNAME: |
148 | dev = dev_get_by_index_rcu(nft_net(pkt), index); | ||
149 | if (priv->flags & NFTA_FIB_F_PRESENT) | 149 | if (priv->flags & NFTA_FIB_F_PRESENT) |
150 | *dreg = !!dev; | 150 | *dreg = !!dev; |
151 | else | 151 | else |
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index ffb25d5e8dbe..aa5f571d4361 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <net/netfilter/nf_conntrack_core.h> | 13 | #include <net/netfilter/nf_conntrack_core.h> |
14 | #include <linux/netfilter/nf_conntrack_common.h> | 14 | #include <linux/netfilter/nf_conntrack_common.h> |
15 | #include <net/netfilter/nf_flow_table.h> | 15 | #include <net/netfilter/nf_flow_table.h> |
16 | #include <net/netfilter/nf_conntrack_helper.h> | ||
17 | 16 | ||
18 | struct nft_flow_offload { | 17 | struct nft_flow_offload { |
19 | struct nft_flowtable *flowtable; | 18 | struct nft_flowtable *flowtable; |
@@ -50,15 +49,20 @@ static int nft_flow_route(const struct nft_pktinfo *pkt, | |||
50 | return 0; | 49 | return 0; |
51 | } | 50 | } |
52 | 51 | ||
53 | static bool nft_flow_offload_skip(struct sk_buff *skb) | 52 | static bool nft_flow_offload_skip(struct sk_buff *skb, int family) |
54 | { | 53 | { |
55 | struct ip_options *opt = &(IPCB(skb)->opt); | ||
56 | |||
57 | if (unlikely(opt->optlen)) | ||
58 | return true; | ||
59 | if (skb_sec_path(skb)) | 54 | if (skb_sec_path(skb)) |
60 | return true; | 55 | return true; |
61 | 56 | ||
57 | if (family == NFPROTO_IPV4) { | ||
58 | const struct ip_options *opt; | ||
59 | |||
60 | opt = &(IPCB(skb)->opt); | ||
61 | |||
62 | if (unlikely(opt->optlen)) | ||
63 | return true; | ||
64 | } | ||
65 | |||
62 | return false; | 66 | return false; |
63 | } | 67 | } |
64 | 68 | ||
@@ -68,15 +72,15 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, | |||
68 | { | 72 | { |
69 | struct nft_flow_offload *priv = nft_expr_priv(expr); | 73 | struct nft_flow_offload *priv = nft_expr_priv(expr); |
70 | struct nf_flowtable *flowtable = &priv->flowtable->data; | 74 | struct nf_flowtable *flowtable = &priv->flowtable->data; |
71 | const struct nf_conn_help *help; | ||
72 | enum ip_conntrack_info ctinfo; | 75 | enum ip_conntrack_info ctinfo; |
73 | struct nf_flow_route route; | 76 | struct nf_flow_route route; |
74 | struct flow_offload *flow; | 77 | struct flow_offload *flow; |
75 | enum ip_conntrack_dir dir; | 78 | enum ip_conntrack_dir dir; |
79 | bool is_tcp = false; | ||
76 | struct nf_conn *ct; | 80 | struct nf_conn *ct; |
77 | int ret; | 81 | int ret; |
78 | 82 | ||
79 | if (nft_flow_offload_skip(pkt->skb)) | 83 | if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt))) |
80 | goto out; | 84 | goto out; |
81 | 85 | ||
82 | ct = nf_ct_get(pkt->skb, &ctinfo); | 86 | ct = nf_ct_get(pkt->skb, &ctinfo); |
@@ -85,14 +89,16 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, | |||
85 | 89 | ||
86 | switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { | 90 | switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { |
87 | case IPPROTO_TCP: | 91 | case IPPROTO_TCP: |
92 | is_tcp = true; | ||
93 | break; | ||
88 | case IPPROTO_UDP: | 94 | case IPPROTO_UDP: |
89 | break; | 95 | break; |
90 | default: | 96 | default: |
91 | goto out; | 97 | goto out; |
92 | } | 98 | } |
93 | 99 | ||
94 | help = nfct_help(ct); | 100 | if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || |
95 | if (help) | 101 | ct->status & IPS_SEQ_ADJUST) |
96 | goto out; | 102 | goto out; |
97 | 103 | ||
98 | if (!nf_ct_is_confirmed(ct)) | 104 | if (!nf_ct_is_confirmed(ct)) |
@@ -109,6 +115,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr, | |||
109 | if (!flow) | 115 | if (!flow) |
110 | goto err_flow_alloc; | 116 | goto err_flow_alloc; |
111 | 117 | ||
118 | if (is_tcp) { | ||
119 | ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; | ||
120 | ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; | ||
121 | } | ||
122 | |||
112 | ret = flow_offload_add(flowtable, flow); | 123 | ret = flow_offload_add(flowtable, flow); |
113 | if (ret < 0) | 124 | if (ret < 0) |
114 | goto err_flow_add; | 125 | goto err_flow_add; |
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3e6d1bcc2894..4144984ebee5 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile | |||
@@ -2,6 +2,6 @@ | |||
2 | # Makefile for netfilter selftests | 2 | # Makefile for netfilter selftests |
3 | 3 | ||
4 | TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ | 4 | TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ |
5 | conntrack_icmp_related.sh | 5 | conntrack_icmp_related.sh nft_flowtable.sh |
6 | 6 | ||
7 | include ../lib.mk | 7 | include ../lib.mk |
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh new file mode 100755 index 000000000000..fe52488a6f72 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh | |||
@@ -0,0 +1,324 @@ | |||
1 | #!/bin/bash | ||
2 | # SPDX-License-Identifier: GPL-2.0 | ||
3 | # | ||
4 | # This tests basic flowtable functionality. | ||
5 | # Creates following topology: | ||
6 | # | ||
7 | # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) | ||
8 | # Router1 is the one doing flow offloading, Router2 has no special | ||
9 | # purpose other than having a link that is smaller than either Originator | ||
10 | # and responder, i.e. TCPMSS announced values are too large and will still | ||
11 | # result in fragmentation and/or PMTU discovery. | ||
12 | |||
13 | # Kselftest framework requirement - SKIP code is 4. | ||
14 | ksft_skip=4 | ||
15 | ret=0 | ||
16 | |||
17 | ns1in="" | ||
18 | ns2in="" | ||
19 | ns1out="" | ||
20 | ns2out="" | ||
21 | |||
22 | log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) | ||
23 | |||
24 | nft --version > /dev/null 2>&1 | ||
25 | if [ $? -ne 0 ];then | ||
26 | echo "SKIP: Could not run test without nft tool" | ||
27 | exit $ksft_skip | ||
28 | fi | ||
29 | |||
30 | ip -Version > /dev/null 2>&1 | ||
31 | if [ $? -ne 0 ];then | ||
32 | echo "SKIP: Could not run test without ip tool" | ||
33 | exit $ksft_skip | ||
34 | fi | ||
35 | |||
36 | which nc > /dev/null 2>&1 | ||
37 | if [ $? -ne 0 ];then | ||
38 | echo "SKIP: Could not run test without nc (netcat)" | ||
39 | exit $ksft_skip | ||
40 | fi | ||
41 | |||
42 | ip netns add nsr1 | ||
43 | if [ $? -ne 0 ];then | ||
44 | echo "SKIP: Could not create net namespace" | ||
45 | exit $ksft_skip | ||
46 | fi | ||
47 | |||
48 | ip netns add ns1 | ||
49 | ip netns add ns2 | ||
50 | |||
51 | ip netns add nsr2 | ||
52 | |||
53 | cleanup() { | ||
54 | for i in 1 2; do | ||
55 | ip netns del ns$i | ||
56 | ip netns del nsr$i | ||
57 | done | ||
58 | |||
59 | rm -f "$ns1in" "$ns1out" | ||
60 | rm -f "$ns2in" "$ns2out" | ||
61 | |||
62 | [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns | ||
63 | } | ||
64 | |||
65 | trap cleanup EXIT | ||
66 | |||
67 | sysctl -q net.netfilter.nf_log_all_netns=1 | ||
68 | |||
69 | ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 | ||
70 | ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 | ||
71 | |||
72 | ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 | ||
73 | |||
74 | for dev in lo veth0 veth1; do | ||
75 | for i in 1 2; do | ||
76 | ip -net nsr$i link set $dev up | ||
77 | done | ||
78 | done | ||
79 | |||
80 | ip -net nsr1 addr add 10.0.1.1/24 dev veth0 | ||
81 | ip -net nsr1 addr add dead:1::1/64 dev veth0 | ||
82 | |||
83 | ip -net nsr2 addr add 10.0.2.1/24 dev veth1 | ||
84 | ip -net nsr2 addr add dead:2::1/64 dev veth1 | ||
85 | |||
86 | # set different MTUs so we need to push packets coming from ns1 (large MTU) | ||
87 | # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), | ||
88 | # or to do PTMU discovery (send ICMP error back to originator). | ||
89 | # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers | ||
90 | # is NOT the lowest link mtu. | ||
91 | |||
92 | ip -net nsr1 link set veth0 mtu 9000 | ||
93 | ip -net ns1 link set eth0 mtu 9000 | ||
94 | |||
95 | ip -net nsr2 link set veth1 mtu 2000 | ||
96 | ip -net ns2 link set eth0 mtu 2000 | ||
97 | |||
98 | # transfer-net between nsr1 and nsr2. | ||
99 | # these addresses are not used for connections. | ||
100 | ip -net nsr1 addr add 192.168.10.1/24 dev veth1 | ||
101 | ip -net nsr1 addr add fee1:2::1/64 dev veth1 | ||
102 | |||
103 | ip -net nsr2 addr add 192.168.10.2/24 dev veth0 | ||
104 | ip -net nsr2 addr add fee1:2::2/64 dev veth0 | ||
105 | |||
106 | for i in 1 2; do | ||
107 | ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null | ||
108 | ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null | ||
109 | |||
110 | ip -net ns$i link set lo up | ||
111 | ip -net ns$i link set eth0 up | ||
112 | ip -net ns$i addr add 10.0.$i.99/24 dev eth0 | ||
113 | ip -net ns$i route add default via 10.0.$i.1 | ||
114 | ip -net ns$i addr add dead:$i::99/64 dev eth0 | ||
115 | ip -net ns$i route add default via dead:$i::1 | ||
116 | ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null | ||
117 | |||
118 | # don't set ip DF bit for first two tests | ||
119 | ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null | ||
120 | done | ||
121 | |||
122 | ip -net nsr1 route add default via 192.168.10.2 | ||
123 | ip -net nsr2 route add default via 192.168.10.1 | ||
124 | |||
125 | ip netns exec nsr1 nft -f - <<EOF | ||
126 | table inet filter { | ||
127 | flowtable f1 { | ||
128 | hook ingress priority 0 | ||
129 | devices = { veth0, veth1 } | ||
130 | } | ||
131 | |||
132 | chain forward { | ||
133 | type filter hook forward priority 0; policy drop; | ||
134 | |||
135 | # flow offloaded? Tag ct with mark 1, so we can detect when it fails. | ||
136 | meta oif "veth1" tcp dport 12345 flow offload @f1 counter | ||
137 | |||
138 | # use packet size to trigger 'should be offloaded by now'. | ||
139 | # otherwise, if 'flow offload' expression never offloads, the | ||
140 | # test will pass. | ||
141 | tcp dport 12345 meta length gt 200 ct mark set 1 counter | ||
142 | |||
143 | # this turns off flow offloading internally, so expect packets again | ||
144 | tcp flags fin,rst ct mark set 0 accept | ||
145 | |||
146 | # this allows large packets from responder, we need this as long | ||
147 | # as PMTUd is off. | ||
148 | # This rule is deleted for the last test, when we expect PMTUd | ||
149 | # to kick in and ensure all packets meet mtu requirements. | ||
150 | meta length gt 1500 accept comment something-to-grep-for | ||
151 | |||
152 | # next line blocks connection w.o. working offload. | ||
153 | # we only do this for reverse dir, because we expect packets to | ||
154 | # enter slow path due to MTU mismatch of veth0 and veth1. | ||
155 | tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop | ||
156 | |||
157 | ct state established,related accept | ||
158 | |||
159 | # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) | ||
160 | meta length lt 200 oif "veth1" tcp dport 12345 counter accept | ||
161 | |||
162 | meta nfproto ipv4 meta l4proto icmp accept | ||
163 | meta nfproto ipv6 meta l4proto icmpv6 accept | ||
164 | } | ||
165 | } | ||
166 | EOF | ||
167 | |||
168 | if [ $? -ne 0 ]; then | ||
169 | echo "SKIP: Could not load nft ruleset" | ||
170 | exit $ksft_skip | ||
171 | fi | ||
172 | |||
173 | # test basic connectivity | ||
174 | ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null | ||
175 | if [ $? -ne 0 ];then | ||
176 | echo "ERROR: ns1 cannot reach ns2" 1>&2 | ||
177 | bash | ||
178 | exit 1 | ||
179 | fi | ||
180 | |||
181 | ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null | ||
182 | if [ $? -ne 0 ];then | ||
183 | echo "ERROR: ns2 cannot reach ns1" 1>&2 | ||
184 | exit 1 | ||
185 | fi | ||
186 | |||
187 | if [ $ret -eq 0 ];then | ||
188 | echo "PASS: netns routing/connectivity: ns1 can reach ns2" | ||
189 | fi | ||
190 | |||
191 | ns1in=$(mktemp) | ||
192 | ns1out=$(mktemp) | ||
193 | ns2in=$(mktemp) | ||
194 | ns2out=$(mktemp) | ||
195 | |||
196 | make_file() | ||
197 | { | ||
198 | name=$1 | ||
199 | who=$2 | ||
200 | |||
201 | SIZE=$((RANDOM % (1024 * 8))) | ||
202 | TSIZE=$((SIZE * 1024)) | ||
203 | |||
204 | dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null | ||
205 | |||
206 | SIZE=$((RANDOM % 1024)) | ||
207 | SIZE=$((SIZE + 128)) | ||
208 | TSIZE=$((TSIZE + SIZE)) | ||
209 | dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null | ||
210 | } | ||
211 | |||
212 | check_transfer() | ||
213 | { | ||
214 | in=$1 | ||
215 | out=$2 | ||
216 | what=$3 | ||
217 | |||
218 | cmp "$in" "$out" > /dev/null 2>&1 | ||
219 | if [ $? -ne 0 ] ;then | ||
220 | echo "FAIL: file mismatch for $what" 1>&2 | ||
221 | ls -l "$in" | ||
222 | ls -l "$out" | ||
223 | return 1 | ||
224 | fi | ||
225 | |||
226 | return 0 | ||
227 | } | ||
228 | |||
229 | test_tcp_forwarding() | ||
230 | { | ||
231 | local nsa=$1 | ||
232 | local nsb=$2 | ||
233 | local lret=0 | ||
234 | |||
235 | ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & | ||
236 | lpid=$! | ||
237 | |||
238 | sleep 1 | ||
239 | ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" & | ||
240 | cpid=$! | ||
241 | |||
242 | sleep 3 | ||
243 | |||
244 | kill $lpid | ||
245 | kill $cpid | ||
246 | wait | ||
247 | |||
248 | check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" | ||
249 | if [ $? -ne 0 ];then | ||
250 | lret=1 | ||
251 | fi | ||
252 | |||
253 | check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" | ||
254 | if [ $? -ne 0 ];then | ||
255 | lret=1 | ||
256 | fi | ||
257 | |||
258 | return $lret | ||
259 | } | ||
260 | |||
261 | make_file "$ns1in" "ns1" | ||
262 | make_file "$ns2in" "ns2" | ||
263 | |||
264 | # First test: | ||
265 | # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. | ||
266 | test_tcp_forwarding ns1 ns2 | ||
267 | if [ $? -eq 0 ] ;then | ||
268 | echo "PASS: flow offloaded for ns1/ns2" | ||
269 | else | ||
270 | echo "FAIL: flow offload for ns1/ns2:" 1>&2 | ||
271 | ip netns exec nsr1 nft list ruleset | ||
272 | ret=1 | ||
273 | fi | ||
274 | |||
275 | # delete default route, i.e. ns2 won't be able to reach ns1 and | ||
276 | # will depend on ns1 being masqueraded in nsr1. | ||
277 | # expect ns1 has nsr1 address. | ||
278 | ip -net ns2 route del default via 10.0.2.1 | ||
279 | ip -net ns2 route del default via dead:2::1 | ||
280 | ip -net ns2 route add 192.168.10.1 via 10.0.2.1 | ||
281 | |||
282 | # Second test: | ||
283 | # Same, but with NAT enabled. | ||
284 | ip netns exec nsr1 nft -f - <<EOF | ||
285 | table ip nat { | ||
286 | chain postrouting { | ||
287 | type nat hook postrouting priority 0; policy accept; | ||
288 | meta oifname "veth1" masquerade | ||
289 | } | ||
290 | } | ||
291 | EOF | ||
292 | |||
293 | test_tcp_forwarding ns1 ns2 | ||
294 | |||
295 | if [ $? -eq 0 ] ;then | ||
296 | echo "PASS: flow offloaded for ns1/ns2 with NAT" | ||
297 | else | ||
298 | echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 | ||
299 | ip netns exec nsr1 nft list ruleset | ||
300 | ret=1 | ||
301 | fi | ||
302 | |||
303 | # Third test: | ||
304 | # Same as second test, but with PMTU discovery enabled. | ||
305 | handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) | ||
306 | |||
307 | ip netns exec nsr1 nft delete rule inet filter forward $handle | ||
308 | if [ $? -ne 0 ] ;then | ||
309 | echo "FAIL: Could not delete large-packet accept rule" | ||
310 | exit 1 | ||
311 | fi | ||
312 | |||
313 | ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null | ||
314 | ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null | ||
315 | |||
316 | test_tcp_forwarding ns1 ns2 | ||
317 | if [ $? -eq 0 ] ;then | ||
318 | echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" | ||
319 | else | ||
320 | echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 | ||
321 | ip netns exec nsr1 nft list ruleset | ||
322 | fi | ||
323 | |||
324 | exit $ret | ||
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 14fcf3104c77..1be55e705780 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh | |||
@@ -36,7 +36,11 @@ trap cleanup EXIT | |||
36 | ip netns add ns1 | 36 | ip netns add ns1 |
37 | ip netns add ns2 | 37 | ip netns add ns2 |
38 | 38 | ||
39 | ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 | 39 | ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1 |
40 | if [ $? -ne 0 ];then | ||
41 | echo "SKIP: No virtual ethernet pair device support in kernel" | ||
42 | exit $ksft_skip | ||
43 | fi | ||
40 | ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 | 44 | ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 |
41 | 45 | ||
42 | ip -net ns0 link set lo up | 46 | ip -net ns0 link set lo up |