aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/netfilter/nft_fib.h2
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c23
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c2
-rw-r--r--net/netfilter/nf_flow_table_ip.c3
-rw-r--r--net/netfilter/nf_nat_helper.c2
-rw-r--r--net/netfilter/nf_queue.c1
-rw-r--r--net/netfilter/nf_tables_api.c20
-rw-r--r--net/netfilter/nft_fib.c6
-rw-r--r--net/netfilter/nft_flow_offload.c31
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh324
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh6
13 files changed, 375 insertions, 63 deletions
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index a88f92737308..e4c4d8eaca8c 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -34,5 +34,5 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
34 const struct nft_pktinfo *pkt); 34 const struct nft_pktinfo *pkt);
35 35
36void nft_fib_store_result(void *reg, const struct nft_fib *priv, 36void nft_fib_store_result(void *reg, const struct nft_fib *priv,
37 const struct nft_pktinfo *pkt, int index); 37 const struct net_device *dev);
38#endif 38#endif
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 94eb25bc8d7e..c8888e52591f 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -58,11 +58,6 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
58} 58}
59EXPORT_SYMBOL_GPL(nft_fib4_eval_type); 59EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
60 60
61static int get_ifindex(const struct net_device *dev)
62{
63 return dev ? dev->ifindex : 0;
64}
65
66void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, 61void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
67 const struct nft_pktinfo *pkt) 62 const struct nft_pktinfo *pkt)
68{ 63{
@@ -94,8 +89,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
94 89
95 if (nft_hook(pkt) == NF_INET_PRE_ROUTING && 90 if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
96 nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { 91 nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
97 nft_fib_store_result(dest, priv, pkt, 92 nft_fib_store_result(dest, priv, nft_in(pkt));
98 nft_in(pkt)->ifindex);
99 return; 93 return;
100 } 94 }
101 95
@@ -108,8 +102,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
108 if (ipv4_is_zeronet(iph->saddr)) { 102 if (ipv4_is_zeronet(iph->saddr)) {
109 if (ipv4_is_lbcast(iph->daddr) || 103 if (ipv4_is_lbcast(iph->daddr) ||
110 ipv4_is_local_multicast(iph->daddr)) { 104 ipv4_is_local_multicast(iph->daddr)) {
111 nft_fib_store_result(dest, priv, pkt, 105 nft_fib_store_result(dest, priv, pkt->skb->dev);
112 get_ifindex(pkt->skb->dev));
113 return; 106 return;
114 } 107 }
115 } 108 }
@@ -150,17 +143,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
150 found = oif; 143 found = oif;
151 } 144 }
152 145
153 switch (priv->result) { 146 nft_fib_store_result(dest, priv, found);
154 case NFT_FIB_RESULT_OIF:
155 *dest = found->ifindex;
156 break;
157 case NFT_FIB_RESULT_OIFNAME:
158 strncpy((char *)dest, found->name, IFNAMSIZ);
159 break;
160 default:
161 WARN_ON_ONCE(1);
162 break;
163 }
164} 147}
165EXPORT_SYMBOL_GPL(nft_fib4_eval); 148EXPORT_SYMBOL_GPL(nft_fib4_eval);
166 149
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 73cdc0bc63f7..ec068b0cffca 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -169,8 +169,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
169 169
170 if (nft_hook(pkt) == NF_INET_PRE_ROUTING && 170 if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
171 nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { 171 nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
172 nft_fib_store_result(dest, priv, pkt, 172 nft_fib_store_result(dest, priv, nft_in(pkt));
173 nft_in(pkt)->ifindex);
174 return; 173 return;
175 } 174 }
176 175
@@ -187,18 +186,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
187 if (oif && oif != rt->rt6i_idev->dev) 186 if (oif && oif != rt->rt6i_idev->dev)
188 goto put_rt_err; 187 goto put_rt_err;
189 188
190 switch (priv->result) { 189 nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
191 case NFT_FIB_RESULT_OIF:
192 *dest = rt->rt6i_idev->dev->ifindex;
193 break;
194 case NFT_FIB_RESULT_OIFNAME:
195 strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ);
196 break;
197 default:
198 WARN_ON_ONCE(1);
199 break;
200 }
201
202 put_rt_err: 190 put_rt_err:
203 ip6_rt_put(rt); 191 ip6_rt_put(rt);
204} 192}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 14457551bcb4..8ebf21149ec3 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2312,7 +2312,6 @@ static void __net_exit __ip_vs_cleanup(struct net *net)
2312{ 2312{
2313 struct netns_ipvs *ipvs = net_ipvs(net); 2313 struct netns_ipvs *ipvs = net_ipvs(net);
2314 2314
2315 nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
2316 ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ 2315 ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
2317 ip_vs_conn_net_cleanup(ipvs); 2316 ip_vs_conn_net_cleanup(ipvs);
2318 ip_vs_app_net_cleanup(ipvs); 2317 ip_vs_app_net_cleanup(ipvs);
@@ -2327,6 +2326,7 @@ static void __net_exit __ip_vs_dev_cleanup(struct net *net)
2327{ 2326{
2328 struct netns_ipvs *ipvs = net_ipvs(net); 2327 struct netns_ipvs *ipvs = net_ipvs(net);
2329 EnterFunction(2); 2328 EnterFunction(2);
2329 nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
2330 ipvs->enable = 0; /* Disable packet reception */ 2330 ipvs->enable = 0; /* Disable packet reception */
2331 smp_wmb(); 2331 smp_wmb();
2332 ip_vs_sync_net_cleanup(ipvs); 2332 ip_vs_sync_net_cleanup(ipvs);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 96825e20368f..241317473114 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -244,8 +244,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
244 rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 244 rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
245 outdev = rt->dst.dev; 245 outdev = rt->dst.dev;
246 246
247 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && 247 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
248 (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
249 return NF_ACCEPT; 248 return NF_ACCEPT;
250 249
251 if (skb_try_make_writable(skb, sizeof(*iph))) 250 if (skb_try_make_writable(skb, sizeof(*iph)))
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index ccc06f7539d7..53aeb12b70fb 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -170,7 +170,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
170 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) 170 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
171 return true; 171 return true;
172 172
173 nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP, 173 nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_UDP,
174 udph, &udph->check, datalen, oldlen); 174 udph, &udph->check, datalen, oldlen);
175 175
176 return true; 176 return true;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 9dc1d6e04946..b5b2be55ca82 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -255,6 +255,7 @@ static unsigned int nf_iterate(struct sk_buff *skb,
255repeat: 255repeat:
256 verdict = nf_hook_entry_hookfn(hook, skb, state); 256 verdict = nf_hook_entry_hookfn(hook, skb, state);
257 if (verdict != NF_ACCEPT) { 257 if (verdict != NF_ACCEPT) {
258 *index = i;
258 if (verdict != NF_REPEAT) 259 if (verdict != NF_REPEAT)
259 return verdict; 260 return verdict;
260 goto repeat; 261 goto repeat;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 28241e82fd15..4b5159936034 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2270,13 +2270,13 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
2270 u32 flags, int family, 2270 u32 flags, int family,
2271 const struct nft_table *table, 2271 const struct nft_table *table,
2272 const struct nft_chain *chain, 2272 const struct nft_chain *chain,
2273 const struct nft_rule *rule) 2273 const struct nft_rule *rule,
2274 const struct nft_rule *prule)
2274{ 2275{
2275 struct nlmsghdr *nlh; 2276 struct nlmsghdr *nlh;
2276 struct nfgenmsg *nfmsg; 2277 struct nfgenmsg *nfmsg;
2277 const struct nft_expr *expr, *next; 2278 const struct nft_expr *expr, *next;
2278 struct nlattr *list; 2279 struct nlattr *list;
2279 const struct nft_rule *prule;
2280 u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); 2280 u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
2281 2281
2282 nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); 2282 nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
@@ -2296,8 +2296,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
2296 NFTA_RULE_PAD)) 2296 NFTA_RULE_PAD))
2297 goto nla_put_failure; 2297 goto nla_put_failure;
2298 2298
2299 if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { 2299 if (event != NFT_MSG_DELRULE && prule) {
2300 prule = list_prev_entry(rule, list);
2301 if (nla_put_be64(skb, NFTA_RULE_POSITION, 2300 if (nla_put_be64(skb, NFTA_RULE_POSITION,
2302 cpu_to_be64(prule->handle), 2301 cpu_to_be64(prule->handle),
2303 NFTA_RULE_PAD)) 2302 NFTA_RULE_PAD))
@@ -2344,7 +2343,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
2344 2343
2345 err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, 2344 err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
2346 event, 0, ctx->family, ctx->table, 2345 event, 0, ctx->family, ctx->table,
2347 ctx->chain, rule); 2346 ctx->chain, rule, NULL);
2348 if (err < 0) { 2347 if (err < 0) {
2349 kfree_skb(skb); 2348 kfree_skb(skb);
2350 goto err; 2349 goto err;
@@ -2369,12 +2368,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
2369 const struct nft_chain *chain) 2368 const struct nft_chain *chain)
2370{ 2369{
2371 struct net *net = sock_net(skb->sk); 2370 struct net *net = sock_net(skb->sk);
2371 const struct nft_rule *rule, *prule;
2372 unsigned int s_idx = cb->args[0]; 2372 unsigned int s_idx = cb->args[0];
2373 const struct nft_rule *rule;
2374 2373
2374 prule = NULL;
2375 list_for_each_entry_rcu(rule, &chain->rules, list) { 2375 list_for_each_entry_rcu(rule, &chain->rules, list) {
2376 if (!nft_is_active(net, rule)) 2376 if (!nft_is_active(net, rule))
2377 goto cont; 2377 goto cont_skip;
2378 if (*idx < s_idx) 2378 if (*idx < s_idx)
2379 goto cont; 2379 goto cont;
2380 if (*idx > s_idx) { 2380 if (*idx > s_idx) {
@@ -2386,11 +2386,13 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
2386 NFT_MSG_NEWRULE, 2386 NFT_MSG_NEWRULE,
2387 NLM_F_MULTI | NLM_F_APPEND, 2387 NLM_F_MULTI | NLM_F_APPEND,
2388 table->family, 2388 table->family,
2389 table, chain, rule) < 0) 2389 table, chain, rule, prule) < 0)
2390 return 1; 2390 return 1;
2391 2391
2392 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 2392 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2393cont: 2393cont:
2394 prule = rule;
2395cont_skip:
2394 (*idx)++; 2396 (*idx)++;
2395 } 2397 }
2396 return 0; 2398 return 0;
@@ -2546,7 +2548,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
2546 2548
2547 err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, 2549 err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
2548 nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, 2550 nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
2549 family, table, chain, rule); 2551 family, table, chain, rule, NULL);
2550 if (err < 0) 2552 if (err < 0)
2551 goto err; 2553 goto err;
2552 2554
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 21df8cccea65..77f00a99dfab 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -135,17 +135,17 @@ int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr)
135EXPORT_SYMBOL_GPL(nft_fib_dump); 135EXPORT_SYMBOL_GPL(nft_fib_dump);
136 136
137void nft_fib_store_result(void *reg, const struct nft_fib *priv, 137void nft_fib_store_result(void *reg, const struct nft_fib *priv,
138 const struct nft_pktinfo *pkt, int index) 138 const struct net_device *dev)
139{ 139{
140 struct net_device *dev;
141 u32 *dreg = reg; 140 u32 *dreg = reg;
141 int index;
142 142
143 switch (priv->result) { 143 switch (priv->result) {
144 case NFT_FIB_RESULT_OIF: 144 case NFT_FIB_RESULT_OIF:
145 index = dev ? dev->ifindex : 0;
145 *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; 146 *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index;
146 break; 147 break;
147 case NFT_FIB_RESULT_OIFNAME: 148 case NFT_FIB_RESULT_OIFNAME:
148 dev = dev_get_by_index_rcu(nft_net(pkt), index);
149 if (priv->flags & NFTA_FIB_F_PRESENT) 149 if (priv->flags & NFTA_FIB_F_PRESENT)
150 *dreg = !!dev; 150 *dreg = !!dev;
151 else 151 else
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ffb25d5e8dbe..aa5f571d4361 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -13,7 +13,6 @@
13#include <net/netfilter/nf_conntrack_core.h> 13#include <net/netfilter/nf_conntrack_core.h>
14#include <linux/netfilter/nf_conntrack_common.h> 14#include <linux/netfilter/nf_conntrack_common.h>
15#include <net/netfilter/nf_flow_table.h> 15#include <net/netfilter/nf_flow_table.h>
16#include <net/netfilter/nf_conntrack_helper.h>
17 16
18struct nft_flow_offload { 17struct nft_flow_offload {
19 struct nft_flowtable *flowtable; 18 struct nft_flowtable *flowtable;
@@ -50,15 +49,20 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
50 return 0; 49 return 0;
51} 50}
52 51
53static bool nft_flow_offload_skip(struct sk_buff *skb) 52static bool nft_flow_offload_skip(struct sk_buff *skb, int family)
54{ 53{
55 struct ip_options *opt = &(IPCB(skb)->opt);
56
57 if (unlikely(opt->optlen))
58 return true;
59 if (skb_sec_path(skb)) 54 if (skb_sec_path(skb))
60 return true; 55 return true;
61 56
57 if (family == NFPROTO_IPV4) {
58 const struct ip_options *opt;
59
60 opt = &(IPCB(skb)->opt);
61
62 if (unlikely(opt->optlen))
63 return true;
64 }
65
62 return false; 66 return false;
63} 67}
64 68
@@ -68,15 +72,15 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
68{ 72{
69 struct nft_flow_offload *priv = nft_expr_priv(expr); 73 struct nft_flow_offload *priv = nft_expr_priv(expr);
70 struct nf_flowtable *flowtable = &priv->flowtable->data; 74 struct nf_flowtable *flowtable = &priv->flowtable->data;
71 const struct nf_conn_help *help;
72 enum ip_conntrack_info ctinfo; 75 enum ip_conntrack_info ctinfo;
73 struct nf_flow_route route; 76 struct nf_flow_route route;
74 struct flow_offload *flow; 77 struct flow_offload *flow;
75 enum ip_conntrack_dir dir; 78 enum ip_conntrack_dir dir;
79 bool is_tcp = false;
76 struct nf_conn *ct; 80 struct nf_conn *ct;
77 int ret; 81 int ret;
78 82
79 if (nft_flow_offload_skip(pkt->skb)) 83 if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt)))
80 goto out; 84 goto out;
81 85
82 ct = nf_ct_get(pkt->skb, &ctinfo); 86 ct = nf_ct_get(pkt->skb, &ctinfo);
@@ -85,14 +89,16 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
85 89
86 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { 90 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
87 case IPPROTO_TCP: 91 case IPPROTO_TCP:
92 is_tcp = true;
93 break;
88 case IPPROTO_UDP: 94 case IPPROTO_UDP:
89 break; 95 break;
90 default: 96 default:
91 goto out; 97 goto out;
92 } 98 }
93 99
94 help = nfct_help(ct); 100 if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
95 if (help) 101 ct->status & IPS_SEQ_ADJUST)
96 goto out; 102 goto out;
97 103
98 if (!nf_ct_is_confirmed(ct)) 104 if (!nf_ct_is_confirmed(ct))
@@ -109,6 +115,11 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
109 if (!flow) 115 if (!flow)
110 goto err_flow_alloc; 116 goto err_flow_alloc;
111 117
118 if (is_tcp) {
119 ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
120 ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
121 }
122
112 ret = flow_offload_add(flowtable, flow); 123 ret = flow_offload_add(flowtable, flow);
113 if (ret < 0) 124 if (ret < 0)
114 goto err_flow_add; 125 goto err_flow_add;
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 3e6d1bcc2894..4144984ebee5 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -2,6 +2,6 @@
2# Makefile for netfilter selftests 2# Makefile for netfilter selftests
3 3
4TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ 4TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
5 conntrack_icmp_related.sh 5 conntrack_icmp_related.sh nft_flowtable.sh
6 6
7include ../lib.mk 7include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
new file mode 100755
index 000000000000..fe52488a6f72
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -0,0 +1,324 @@
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# This tests basic flowtable functionality.
5# Creates following topology:
6#
7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
8# Router1 is the one doing flow offloading, Router2 has no special
9# purpose other than having a link that is smaller than either Originator
10# and responder, i.e. TCPMSS announced values are too large and will still
11# result in fragmentation and/or PMTU discovery.
12
13# Kselftest framework requirement - SKIP code is 4.
14ksft_skip=4
15ret=0
16
17ns1in=""
18ns2in=""
19ns1out=""
20ns2out=""
21
22log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
23
24nft --version > /dev/null 2>&1
25if [ $? -ne 0 ];then
26 echo "SKIP: Could not run test without nft tool"
27 exit $ksft_skip
28fi
29
30ip -Version > /dev/null 2>&1
31if [ $? -ne 0 ];then
32 echo "SKIP: Could not run test without ip tool"
33 exit $ksft_skip
34fi
35
36which nc > /dev/null 2>&1
37if [ $? -ne 0 ];then
38 echo "SKIP: Could not run test without nc (netcat)"
39 exit $ksft_skip
40fi
41
42ip netns add nsr1
43if [ $? -ne 0 ];then
44 echo "SKIP: Could not create net namespace"
45 exit $ksft_skip
46fi
47
48ip netns add ns1
49ip netns add ns2
50
51ip netns add nsr2
52
53cleanup() {
54 for i in 1 2; do
55 ip netns del ns$i
56 ip netns del nsr$i
57 done
58
59 rm -f "$ns1in" "$ns1out"
60 rm -f "$ns2in" "$ns2out"
61
62 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
63}
64
65trap cleanup EXIT
66
67sysctl -q net.netfilter.nf_log_all_netns=1
68
69ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
70ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
71
72ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
73
74for dev in lo veth0 veth1; do
75 for i in 1 2; do
76 ip -net nsr$i link set $dev up
77 done
78done
79
80ip -net nsr1 addr add 10.0.1.1/24 dev veth0
81ip -net nsr1 addr add dead:1::1/64 dev veth0
82
83ip -net nsr2 addr add 10.0.2.1/24 dev veth1
84ip -net nsr2 addr add dead:2::1/64 dev veth1
85
86# set different MTUs so we need to push packets coming from ns1 (large MTU)
87# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
88# or to do PTMU discovery (send ICMP error back to originator).
89# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
90# is NOT the lowest link mtu.
91
92ip -net nsr1 link set veth0 mtu 9000
93ip -net ns1 link set eth0 mtu 9000
94
95ip -net nsr2 link set veth1 mtu 2000
96ip -net ns2 link set eth0 mtu 2000
97
98# transfer-net between nsr1 and nsr2.
99# these addresses are not used for connections.
100ip -net nsr1 addr add 192.168.10.1/24 dev veth1
101ip -net nsr1 addr add fee1:2::1/64 dev veth1
102
103ip -net nsr2 addr add 192.168.10.2/24 dev veth0
104ip -net nsr2 addr add fee1:2::2/64 dev veth0
105
106for i in 1 2; do
107 ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
108 ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
109
110 ip -net ns$i link set lo up
111 ip -net ns$i link set eth0 up
112 ip -net ns$i addr add 10.0.$i.99/24 dev eth0
113 ip -net ns$i route add default via 10.0.$i.1
114 ip -net ns$i addr add dead:$i::99/64 dev eth0
115 ip -net ns$i route add default via dead:$i::1
116 ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
117
118 # don't set ip DF bit for first two tests
119 ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
120done
121
122ip -net nsr1 route add default via 192.168.10.2
123ip -net nsr2 route add default via 192.168.10.1
124
125ip netns exec nsr1 nft -f - <<EOF
126table inet filter {
127 flowtable f1 {
128 hook ingress priority 0
129 devices = { veth0, veth1 }
130 }
131
132 chain forward {
133 type filter hook forward priority 0; policy drop;
134
135 # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
136 meta oif "veth1" tcp dport 12345 flow offload @f1 counter
137
138 # use packet size to trigger 'should be offloaded by now'.
139 # otherwise, if 'flow offload' expression never offloads, the
140 # test will pass.
141 tcp dport 12345 meta length gt 200 ct mark set 1 counter
142
143 # this turns off flow offloading internally, so expect packets again
144 tcp flags fin,rst ct mark set 0 accept
145
146 # this allows large packets from responder, we need this as long
147 # as PMTUd is off.
148 # This rule is deleted for the last test, when we expect PMTUd
149 # to kick in and ensure all packets meet mtu requirements.
150 meta length gt 1500 accept comment something-to-grep-for
151
152 # next line blocks connection w.o. working offload.
153 # we only do this for reverse dir, because we expect packets to
154 # enter slow path due to MTU mismatch of veth0 and veth1.
155 tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
156
157 ct state established,related accept
158
159 # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
160 meta length lt 200 oif "veth1" tcp dport 12345 counter accept
161
162 meta nfproto ipv4 meta l4proto icmp accept
163 meta nfproto ipv6 meta l4proto icmpv6 accept
164 }
165}
166EOF
167
168if [ $? -ne 0 ]; then
169 echo "SKIP: Could not load nft ruleset"
170 exit $ksft_skip
171fi
172
173# test basic connectivity
174ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
175if [ $? -ne 0 ];then
176 echo "ERROR: ns1 cannot reach ns2" 1>&2
177 bash
178 exit 1
179fi
180
181ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
182if [ $? -ne 0 ];then
183 echo "ERROR: ns2 cannot reach ns1" 1>&2
184 exit 1
185fi
186
187if [ $ret -eq 0 ];then
188 echo "PASS: netns routing/connectivity: ns1 can reach ns2"
189fi
190
191ns1in=$(mktemp)
192ns1out=$(mktemp)
193ns2in=$(mktemp)
194ns2out=$(mktemp)
195
196make_file()
197{
198 name=$1
199 who=$2
200
201 SIZE=$((RANDOM % (1024 * 8)))
202 TSIZE=$((SIZE * 1024))
203
204 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
205
206 SIZE=$((RANDOM % 1024))
207 SIZE=$((SIZE + 128))
208 TSIZE=$((TSIZE + SIZE))
209 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
210}
211
212check_transfer()
213{
214 in=$1
215 out=$2
216 what=$3
217
218 cmp "$in" "$out" > /dev/null 2>&1
219 if [ $? -ne 0 ] ;then
220 echo "FAIL: file mismatch for $what" 1>&2
221 ls -l "$in"
222 ls -l "$out"
223 return 1
224 fi
225
226 return 0
227}
228
229test_tcp_forwarding()
230{
231 local nsa=$1
232 local nsb=$2
233 local lret=0
234
235 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
236 lpid=$!
237
238 sleep 1
239 ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
240 cpid=$!
241
242 sleep 3
243
244 kill $lpid
245 kill $cpid
246 wait
247
248 check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
249 if [ $? -ne 0 ];then
250 lret=1
251 fi
252
253 check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
254 if [ $? -ne 0 ];then
255 lret=1
256 fi
257
258 return $lret
259}
260
261make_file "$ns1in" "ns1"
262make_file "$ns2in" "ns2"
263
264# First test:
265# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
266test_tcp_forwarding ns1 ns2
267if [ $? -eq 0 ] ;then
268 echo "PASS: flow offloaded for ns1/ns2"
269else
270 echo "FAIL: flow offload for ns1/ns2:" 1>&2
271 ip netns exec nsr1 nft list ruleset
272 ret=1
273fi
274
275# delete default route, i.e. ns2 won't be able to reach ns1 and
276# will depend on ns1 being masqueraded in nsr1.
277# expect ns1 has nsr1 address.
278ip -net ns2 route del default via 10.0.2.1
279ip -net ns2 route del default via dead:2::1
280ip -net ns2 route add 192.168.10.1 via 10.0.2.1
281
282# Second test:
283# Same, but with NAT enabled.
284ip netns exec nsr1 nft -f - <<EOF
285table ip nat {
286 chain postrouting {
287 type nat hook postrouting priority 0; policy accept;
288 meta oifname "veth1" masquerade
289 }
290}
291EOF
292
293test_tcp_forwarding ns1 ns2
294
295if [ $? -eq 0 ] ;then
296 echo "PASS: flow offloaded for ns1/ns2 with NAT"
297else
298 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
299 ip netns exec nsr1 nft list ruleset
300 ret=1
301fi
302
303# Third test:
304# Same as second test, but with PMTU discovery enabled.
305handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
306
307ip netns exec nsr1 nft delete rule inet filter forward $handle
308if [ $? -ne 0 ] ;then
309 echo "FAIL: Could not delete large-packet accept rule"
310 exit 1
311fi
312
313ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
314ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
315
316test_tcp_forwarding ns1 ns2
317if [ $? -eq 0 ] ;then
318 echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
319else
320 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
321 ip netns exec nsr1 nft list ruleset
322fi
323
324exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 14fcf3104c77..1be55e705780 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -36,7 +36,11 @@ trap cleanup EXIT
36ip netns add ns1 36ip netns add ns1
37ip netns add ns2 37ip netns add ns2
38 38
39ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 39ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1
40if [ $? -ne 0 ];then
41 echo "SKIP: No virtual ethernet pair device support in kernel"
42 exit $ksft_skip
43fi
40ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 44ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
41 45
42ip -net ns0 link set lo up 46ip -net ns0 link set lo up