aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-05-22 12:06:23 -0400
committerDavid S. Miller <davem@davemloft.net>2014-05-22 12:06:23 -0400
commit8af750d739620a0028dc767b289b0ed1d61fb38b (patch)
treeb3917c2333e1e9e54106e45ab8bbe7c13daf70d5 /net/netfilter
parent758bd61aa987e82765bd432f37bd81bd197c4b1a (diff)
parentc7c32e72cbe23cea97c5d87ffcf6e23cc1ec1a65 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables
Pablo Neira Ayuso says: ==================== Netfilter/nftables updates for net-next The following patchset contains Netfilter/nftables updates for net-next, most relevantly they are: 1) Add set element update notification via netlink, from Arturo Borrero. 2) Put all object updates in one single message batch that is sent to kernel-space. Before this patch only rules where included in the batch. This series also introduces the generic transaction infrastructure so updates to all objects (tables, chains, rules and sets) are applied in an all-or-nothing fashion, these series from me. 3) Defer release of objects via call_rcu to reduce the time required to commit changes. The assumption is that all objects are destroyed in reverse order to ensure that dependencies betweem them are fulfilled (ie. rules and sets are destroyed first, then chains, and finally tables). 4) Allow to match by bridge port name, from Tomasz Bursztyka. This series include two patches to prepare this new feature. 5) Implement the proper set selection based on the characteristics of the data. The new infrastructure also allows you to specify your preferences in terms of memory and computational complexity so the underlying set type is also selected according to your needs, from Patrick McHardy. 6) Several cleanup patches for nft expressions, including one minor possible compilation breakage due to missing mark support, also from Patrick. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/nf_tables_api.c1236
-rw-r--r--net/netfilter/nft_ct.c96
-rw-r--r--net/netfilter/nft_hash.c54
-rw-r--r--net/netfilter/nft_lookup.c10
-rw-r--r--net/netfilter/nft_meta.c103
-rw-r--r--net/netfilter/nft_rbtree.c21
6 files changed, 1061 insertions, 459 deletions
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3fd159db9f06..047884776586 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -88,6 +88,45 @@ nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
88 return ERR_PTR(-EAFNOSUPPORT); 88 return ERR_PTR(-EAFNOSUPPORT);
89} 89}
90 90
91static void nft_ctx_init(struct nft_ctx *ctx,
92 const struct sk_buff *skb,
93 const struct nlmsghdr *nlh,
94 struct nft_af_info *afi,
95 struct nft_table *table,
96 struct nft_chain *chain,
97 const struct nlattr * const *nla)
98{
99 ctx->net = sock_net(skb->sk);
100 ctx->afi = afi;
101 ctx->table = table;
102 ctx->chain = chain;
103 ctx->nla = nla;
104 ctx->portid = NETLINK_CB(skb).portid;
105 ctx->report = nlmsg_report(nlh);
106 ctx->seq = nlh->nlmsg_seq;
107}
108
109static struct nft_trans *nft_trans_alloc(struct nft_ctx *ctx, int msg_type,
110 u32 size)
111{
112 struct nft_trans *trans;
113
114 trans = kzalloc(sizeof(struct nft_trans) + size, GFP_KERNEL);
115 if (trans == NULL)
116 return NULL;
117
118 trans->msg_type = msg_type;
119 trans->ctx = *ctx;
120
121 return trans;
122}
123
124static void nft_trans_destroy(struct nft_trans *trans)
125{
126 list_del(&trans->list);
127 kfree(trans);
128}
129
91/* 130/*
92 * Tables 131 * Tables
93 */ 132 */
@@ -197,20 +236,13 @@ nla_put_failure:
197 return -1; 236 return -1;
198} 237}
199 238
200static int nf_tables_table_notify(const struct sk_buff *oskb, 239static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
201 const struct nlmsghdr *nlh,
202 const struct nft_table *table,
203 int event, int family)
204{ 240{
205 struct sk_buff *skb; 241 struct sk_buff *skb;
206 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
207 u32 seq = nlh ? nlh->nlmsg_seq : 0;
208 struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
209 bool report;
210 int err; 242 int err;
211 243
212 report = nlh ? nlmsg_report(nlh) : false; 244 if (!ctx->report &&
213 if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) 245 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
214 return 0; 246 return 0;
215 247
216 err = -ENOBUFS; 248 err = -ENOBUFS;
@@ -218,18 +250,20 @@ static int nf_tables_table_notify(const struct sk_buff *oskb,
218 if (skb == NULL) 250 if (skb == NULL)
219 goto err; 251 goto err;
220 252
221 err = nf_tables_fill_table_info(skb, portid, seq, event, 0, 253 err = nf_tables_fill_table_info(skb, ctx->portid, ctx->seq, event, 0,
222 family, table); 254 ctx->afi->family, ctx->table);
223 if (err < 0) { 255 if (err < 0) {
224 kfree_skb(skb); 256 kfree_skb(skb);
225 goto err; 257 goto err;
226 } 258 }
227 259
228 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, 260 err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
229 GFP_KERNEL); 261 ctx->report, GFP_KERNEL);
230err: 262err:
231 if (err < 0) 263 if (err < 0) {
232 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); 264 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
265 err);
266 }
233 return err; 267 return err;
234} 268}
235 269
@@ -269,6 +303,9 @@ done:
269 return skb->len; 303 return skb->len;
270} 304}
271 305
306/* Internal table flags */
307#define NFT_TABLE_INACTIVE (1 << 15)
308
272static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb, 309static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
273 const struct nlmsghdr *nlh, 310 const struct nlmsghdr *nlh,
274 const struct nlattr * const nla[]) 311 const struct nlattr * const nla[])
@@ -295,6 +332,8 @@ static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
295 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); 332 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
296 if (IS_ERR(table)) 333 if (IS_ERR(table))
297 return PTR_ERR(table); 334 return PTR_ERR(table);
335 if (table->flags & NFT_TABLE_INACTIVE)
336 return -ENOENT;
298 337
299 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 338 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
300 if (!skb2) 339 if (!skb2)
@@ -343,7 +382,7 @@ err:
343 return err; 382 return err;
344} 383}
345 384
346static int nf_tables_table_disable(const struct nft_af_info *afi, 385static void nf_tables_table_disable(const struct nft_af_info *afi,
347 struct nft_table *table) 386 struct nft_table *table)
348{ 387{
349 struct nft_chain *chain; 388 struct nft_chain *chain;
@@ -353,45 +392,63 @@ static int nf_tables_table_disable(const struct nft_af_info *afi,
353 nf_unregister_hooks(nft_base_chain(chain)->ops, 392 nf_unregister_hooks(nft_base_chain(chain)->ops,
354 afi->nops); 393 afi->nops);
355 } 394 }
356
357 return 0;
358} 395}
359 396
360static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb, 397static int nf_tables_updtable(struct nft_ctx *ctx)
361 const struct nlmsghdr *nlh,
362 const struct nlattr * const nla[],
363 struct nft_af_info *afi, struct nft_table *table)
364{ 398{
365 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 399 struct nft_trans *trans;
366 int family = nfmsg->nfgen_family, ret = 0; 400 u32 flags;
401 int ret = 0;
367 402
368 if (nla[NFTA_TABLE_FLAGS]) { 403 if (!ctx->nla[NFTA_TABLE_FLAGS])
369 u32 flags; 404 return 0;
370 405
371 flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS])); 406 flags = ntohl(nla_get_be32(ctx->nla[NFTA_TABLE_FLAGS]));
372 if (flags & ~NFT_TABLE_F_DORMANT) 407 if (flags & ~NFT_TABLE_F_DORMANT)
373 return -EINVAL; 408 return -EINVAL;
409
410 trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
411 sizeof(struct nft_trans_table));
412 if (trans == NULL)
413 return -ENOMEM;
374 414
375 if ((flags & NFT_TABLE_F_DORMANT) && 415 if ((flags & NFT_TABLE_F_DORMANT) &&
376 !(table->flags & NFT_TABLE_F_DORMANT)) { 416 !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
377 ret = nf_tables_table_disable(afi, table); 417 nft_trans_table_enable(trans) = false;
378 if (ret >= 0) 418 } else if (!(flags & NFT_TABLE_F_DORMANT) &&
379 table->flags |= NFT_TABLE_F_DORMANT; 419 ctx->table->flags & NFT_TABLE_F_DORMANT) {
380 } else if (!(flags & NFT_TABLE_F_DORMANT) && 420 ret = nf_tables_table_enable(ctx->afi, ctx->table);
381 table->flags & NFT_TABLE_F_DORMANT) { 421 if (ret >= 0) {
382 ret = nf_tables_table_enable(afi, table); 422 ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
383 if (ret >= 0) 423 nft_trans_table_enable(trans) = true;
384 table->flags &= ~NFT_TABLE_F_DORMANT;
385 } 424 }
386 if (ret < 0)
387 goto err;
388 } 425 }
426 if (ret < 0)
427 goto err;
389 428
390 nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family); 429 nft_trans_table_update(trans) = true;
430 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
431 return 0;
391err: 432err:
433 nft_trans_destroy(trans);
392 return ret; 434 return ret;
393} 435}
394 436
437static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
438{
439 struct nft_trans *trans;
440
441 trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_table));
442 if (trans == NULL)
443 return -ENOMEM;
444
445 if (msg_type == NFT_MSG_NEWTABLE)
446 ctx->table->flags |= NFT_TABLE_INACTIVE;
447
448 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
449 return 0;
450}
451
395static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb, 452static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
396 const struct nlmsghdr *nlh, 453 const struct nlmsghdr *nlh,
397 const struct nlattr * const nla[]) 454 const struct nlattr * const nla[])
@@ -403,6 +460,8 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
403 struct net *net = sock_net(skb->sk); 460 struct net *net = sock_net(skb->sk);
404 int family = nfmsg->nfgen_family; 461 int family = nfmsg->nfgen_family;
405 u32 flags = 0; 462 u32 flags = 0;
463 struct nft_ctx ctx;
464 int err;
406 465
407 afi = nf_tables_afinfo_lookup(net, family, true); 466 afi = nf_tables_afinfo_lookup(net, family, true);
408 if (IS_ERR(afi)) 467 if (IS_ERR(afi))
@@ -417,11 +476,15 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
417 } 476 }
418 477
419 if (table != NULL) { 478 if (table != NULL) {
479 if (table->flags & NFT_TABLE_INACTIVE)
480 return -ENOENT;
420 if (nlh->nlmsg_flags & NLM_F_EXCL) 481 if (nlh->nlmsg_flags & NLM_F_EXCL)
421 return -EEXIST; 482 return -EEXIST;
422 if (nlh->nlmsg_flags & NLM_F_REPLACE) 483 if (nlh->nlmsg_flags & NLM_F_REPLACE)
423 return -EOPNOTSUPP; 484 return -EOPNOTSUPP;
424 return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table); 485
486 nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
487 return nf_tables_updtable(&ctx);
425 } 488 }
426 489
427 if (nla[NFTA_TABLE_FLAGS]) { 490 if (nla[NFTA_TABLE_FLAGS]) {
@@ -444,8 +507,14 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
444 INIT_LIST_HEAD(&table->sets); 507 INIT_LIST_HEAD(&table->sets);
445 table->flags = flags; 508 table->flags = flags;
446 509
510 nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
511 err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
512 if (err < 0) {
513 kfree(table);
514 module_put(afi->owner);
515 return err;
516 }
447 list_add_tail(&table->list, &afi->tables); 517 list_add_tail(&table->list, &afi->tables);
448 nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
449 return 0; 518 return 0;
450} 519}
451 520
@@ -457,7 +526,8 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
457 struct nft_af_info *afi; 526 struct nft_af_info *afi;
458 struct nft_table *table; 527 struct nft_table *table;
459 struct net *net = sock_net(skb->sk); 528 struct net *net = sock_net(skb->sk);
460 int family = nfmsg->nfgen_family; 529 int family = nfmsg->nfgen_family, err;
530 struct nft_ctx ctx;
461 531
462 afi = nf_tables_afinfo_lookup(net, family, false); 532 afi = nf_tables_afinfo_lookup(net, family, false);
463 if (IS_ERR(afi)) 533 if (IS_ERR(afi))
@@ -466,17 +536,27 @@ static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
466 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]); 536 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
467 if (IS_ERR(table)) 537 if (IS_ERR(table))
468 return PTR_ERR(table); 538 return PTR_ERR(table);
539 if (table->flags & NFT_TABLE_INACTIVE)
540 return -ENOENT;
469 541
470 if (!list_empty(&table->chains) || !list_empty(&table->sets)) 542 if (!list_empty(&table->chains) || !list_empty(&table->sets))
471 return -EBUSY; 543 return -EBUSY;
472 544
545 nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
546 err = nft_trans_table_add(&ctx, NFT_MSG_DELTABLE);
547 if (err < 0)
548 return err;
549
473 list_del(&table->list); 550 list_del(&table->list);
474 nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
475 kfree(table);
476 module_put(afi->owner);
477 return 0; 551 return 0;
478} 552}
479 553
554static void nf_tables_table_destroy(struct nft_ctx *ctx)
555{
556 kfree(ctx->table);
557 module_put(ctx->afi->owner);
558}
559
480int nft_register_chain_type(const struct nf_chain_type *ctype) 560int nft_register_chain_type(const struct nf_chain_type *ctype)
481{ 561{
482 int err = 0; 562 int err = 0;
@@ -541,7 +621,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
541 .len = NFT_CHAIN_MAXNAMELEN - 1 }, 621 .len = NFT_CHAIN_MAXNAMELEN - 1 },
542 [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED }, 622 [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
543 [NFTA_CHAIN_POLICY] = { .type = NLA_U32 }, 623 [NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
544 [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING }, 624 [NFTA_CHAIN_TYPE] = { .type = NLA_STRING },
545 [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, 625 [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
546}; 626};
547 627
@@ -637,21 +717,13 @@ nla_put_failure:
637 return -1; 717 return -1;
638} 718}
639 719
640static int nf_tables_chain_notify(const struct sk_buff *oskb, 720static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
641 const struct nlmsghdr *nlh,
642 const struct nft_table *table,
643 const struct nft_chain *chain,
644 int event, int family)
645{ 721{
646 struct sk_buff *skb; 722 struct sk_buff *skb;
647 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
648 struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
649 u32 seq = nlh ? nlh->nlmsg_seq : 0;
650 bool report;
651 int err; 723 int err;
652 724
653 report = nlh ? nlmsg_report(nlh) : false; 725 if (!ctx->report &&
654 if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) 726 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
655 return 0; 727 return 0;
656 728
657 err = -ENOBUFS; 729 err = -ENOBUFS;
@@ -659,18 +731,21 @@ static int nf_tables_chain_notify(const struct sk_buff *oskb,
659 if (skb == NULL) 731 if (skb == NULL)
660 goto err; 732 goto err;
661 733
662 err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family, 734 err = nf_tables_fill_chain_info(skb, ctx->portid, ctx->seq, event, 0,
663 table, chain); 735 ctx->afi->family, ctx->table,
736 ctx->chain);
664 if (err < 0) { 737 if (err < 0) {
665 kfree_skb(skb); 738 kfree_skb(skb);
666 goto err; 739 goto err;
667 } 740 }
668 741
669 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, 742 err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
670 GFP_KERNEL); 743 ctx->report, GFP_KERNEL);
671err: 744err:
672 if (err < 0) 745 if (err < 0) {
673 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); 746 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
747 err);
748 }
674 return err; 749 return err;
675} 750}
676 751
@@ -740,10 +815,14 @@ static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
740 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); 815 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
741 if (IS_ERR(table)) 816 if (IS_ERR(table))
742 return PTR_ERR(table); 817 return PTR_ERR(table);
818 if (table->flags & NFT_TABLE_INACTIVE)
819 return -ENOENT;
743 820
744 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); 821 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
745 if (IS_ERR(chain)) 822 if (IS_ERR(chain))
746 return PTR_ERR(chain); 823 return PTR_ERR(chain);
824 if (chain->flags & NFT_CHAIN_INACTIVE)
825 return -ENOENT;
747 826
748 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 827 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
749 if (!skb2) 828 if (!skb2)
@@ -767,8 +846,7 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
767 [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, 846 [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
768}; 847};
769 848
770static int 849static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
771nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
772{ 850{
773 struct nlattr *tb[NFTA_COUNTER_MAX+1]; 851 struct nlattr *tb[NFTA_COUNTER_MAX+1];
774 struct nft_stats __percpu *newstats; 852 struct nft_stats __percpu *newstats;
@@ -777,14 +855,14 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
777 855
778 err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy); 856 err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
779 if (err < 0) 857 if (err < 0)
780 return err; 858 return ERR_PTR(err);
781 859
782 if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS]) 860 if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
783 return -EINVAL; 861 return ERR_PTR(-EINVAL);
784 862
785 newstats = alloc_percpu(struct nft_stats); 863 newstats = alloc_percpu(struct nft_stats);
786 if (newstats == NULL) 864 if (newstats == NULL)
787 return -ENOMEM; 865 return ERR_PTR(-ENOMEM);
788 866
789 /* Restore old counters on this cpu, no problem. Per-cpu statistics 867 /* Restore old counters on this cpu, no problem. Per-cpu statistics
790 * are not exposed to userspace. 868 * are not exposed to userspace.
@@ -793,6 +871,12 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
793 stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES])); 871 stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
794 stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS])); 872 stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
795 873
874 return newstats;
875}
876
877static void nft_chain_stats_replace(struct nft_base_chain *chain,
878 struct nft_stats __percpu *newstats)
879{
796 if (chain->stats) { 880 if (chain->stats) {
797 struct nft_stats __percpu *oldstats = 881 struct nft_stats __percpu *oldstats =
798 nft_dereference(chain->stats); 882 nft_dereference(chain->stats);
@@ -802,17 +886,43 @@ nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
802 free_percpu(oldstats); 886 free_percpu(oldstats);
803 } else 887 } else
804 rcu_assign_pointer(chain->stats, newstats); 888 rcu_assign_pointer(chain->stats, newstats);
889}
890
891static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
892{
893 struct nft_trans *trans;
894
895 trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain));
896 if (trans == NULL)
897 return -ENOMEM;
898
899 if (msg_type == NFT_MSG_NEWCHAIN)
900 ctx->chain->flags |= NFT_CHAIN_INACTIVE;
805 901
902 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
806 return 0; 903 return 0;
807} 904}
808 905
906static void nf_tables_chain_destroy(struct nft_chain *chain)
907{
908 BUG_ON(chain->use > 0);
909
910 if (chain->flags & NFT_BASE_CHAIN) {
911 module_put(nft_base_chain(chain)->type->owner);
912 free_percpu(nft_base_chain(chain)->stats);
913 kfree(nft_base_chain(chain));
914 } else {
915 kfree(chain);
916 }
917}
918
809static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, 919static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
810 const struct nlmsghdr *nlh, 920 const struct nlmsghdr *nlh,
811 const struct nlattr * const nla[]) 921 const struct nlattr * const nla[])
812{ 922{
813 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 923 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
814 const struct nlattr * uninitialized_var(name); 924 const struct nlattr * uninitialized_var(name);
815 const struct nft_af_info *afi; 925 struct nft_af_info *afi;
816 struct nft_table *table; 926 struct nft_table *table;
817 struct nft_chain *chain; 927 struct nft_chain *chain;
818 struct nft_base_chain *basechain = NULL; 928 struct nft_base_chain *basechain = NULL;
@@ -822,8 +932,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
822 u8 policy = NF_ACCEPT; 932 u8 policy = NF_ACCEPT;
823 u64 handle = 0; 933 u64 handle = 0;
824 unsigned int i; 934 unsigned int i;
935 struct nft_stats __percpu *stats;
825 int err; 936 int err;
826 bool create; 937 bool create;
938 struct nft_ctx ctx;
827 939
828 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 940 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
829 941
@@ -869,6 +981,11 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
869 } 981 }
870 982
871 if (chain != NULL) { 983 if (chain != NULL) {
984 struct nft_stats *stats = NULL;
985 struct nft_trans *trans;
986
987 if (chain->flags & NFT_CHAIN_INACTIVE)
988 return -ENOENT;
872 if (nlh->nlmsg_flags & NLM_F_EXCL) 989 if (nlh->nlmsg_flags & NLM_F_EXCL)
873 return -EEXIST; 990 return -EEXIST;
874 if (nlh->nlmsg_flags & NLM_F_REPLACE) 991 if (nlh->nlmsg_flags & NLM_F_REPLACE)
@@ -882,19 +999,31 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
882 if (!(chain->flags & NFT_BASE_CHAIN)) 999 if (!(chain->flags & NFT_BASE_CHAIN))
883 return -EOPNOTSUPP; 1000 return -EOPNOTSUPP;
884 1001
885 err = nf_tables_counters(nft_base_chain(chain), 1002 stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
886 nla[NFTA_CHAIN_COUNTERS]); 1003 if (IS_ERR(stats))
887 if (err < 0) 1004 return PTR_ERR(stats);
888 return err;
889 } 1005 }
890 1006
891 if (nla[NFTA_CHAIN_POLICY]) 1007 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
892 nft_base_chain(chain)->policy = policy; 1008 trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN,
1009 sizeof(struct nft_trans_chain));
1010 if (trans == NULL)
1011 return -ENOMEM;
893 1012
894 if (nla[NFTA_CHAIN_HANDLE] && name) 1013 nft_trans_chain_stats(trans) = stats;
895 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); 1014 nft_trans_chain_update(trans) = true;
896 1015
897 goto notify; 1016 if (nla[NFTA_CHAIN_POLICY])
1017 nft_trans_chain_policy(trans) = policy;
1018 else
1019 nft_trans_chain_policy(trans) = -1;
1020
1021 if (nla[NFTA_CHAIN_HANDLE] && name) {
1022 nla_strlcpy(nft_trans_chain_name(trans), name,
1023 NFT_CHAIN_MAXNAMELEN);
1024 }
1025 list_add_tail(&trans->list, &net->nft.commit_list);
1026 return 0;
898 } 1027 }
899 1028
900 if (table->use == UINT_MAX) 1029 if (table->use == UINT_MAX)
@@ -939,23 +1068,21 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
939 return -ENOMEM; 1068 return -ENOMEM;
940 1069
941 if (nla[NFTA_CHAIN_COUNTERS]) { 1070 if (nla[NFTA_CHAIN_COUNTERS]) {
942 err = nf_tables_counters(basechain, 1071 stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
943 nla[NFTA_CHAIN_COUNTERS]); 1072 if (IS_ERR(stats)) {
944 if (err < 0) {
945 module_put(type->owner); 1073 module_put(type->owner);
946 kfree(basechain); 1074 kfree(basechain);
947 return err; 1075 return PTR_ERR(stats);
948 } 1076 }
1077 basechain->stats = stats;
949 } else { 1078 } else {
950 struct nft_stats __percpu *newstats; 1079 stats = alloc_percpu(struct nft_stats);
951 1080 if (IS_ERR(stats)) {
952 newstats = alloc_percpu(struct nft_stats);
953 if (newstats == NULL) {
954 module_put(type->owner); 1081 module_put(type->owner);
955 kfree(basechain); 1082 kfree(basechain);
956 return -ENOMEM; 1083 return PTR_ERR(stats);
957 } 1084 }
958 rcu_assign_pointer(basechain->stats, newstats); 1085 rcu_assign_pointer(basechain->stats, stats);
959 } 1086 }
960 1087
961 basechain->type = type; 1088 basechain->type = type;
@@ -992,31 +1119,26 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
992 if (!(table->flags & NFT_TABLE_F_DORMANT) && 1119 if (!(table->flags & NFT_TABLE_F_DORMANT) &&
993 chain->flags & NFT_BASE_CHAIN) { 1120 chain->flags & NFT_BASE_CHAIN) {
994 err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops); 1121 err = nf_register_hooks(nft_base_chain(chain)->ops, afi->nops);
995 if (err < 0) { 1122 if (err < 0)
996 module_put(basechain->type->owner); 1123 goto err1;
997 free_percpu(basechain->stats);
998 kfree(basechain);
999 return err;
1000 }
1001 } 1124 }
1002 list_add_tail(&chain->list, &table->chains);
1003 table->use++;
1004notify:
1005 nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
1006 family);
1007 return 0;
1008}
1009 1125
1010static void nf_tables_chain_destroy(struct nft_chain *chain) 1126 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1011{ 1127 err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN);
1012 BUG_ON(chain->use > 0); 1128 if (err < 0)
1129 goto err2;
1013 1130
1014 if (chain->flags & NFT_BASE_CHAIN) { 1131 list_add_tail(&chain->list, &table->chains);
1015 module_put(nft_base_chain(chain)->type->owner); 1132 return 0;
1016 free_percpu(nft_base_chain(chain)->stats); 1133err2:
1017 kfree(nft_base_chain(chain)); 1134 if (!(table->flags & NFT_TABLE_F_DORMANT) &&
1018 } else 1135 chain->flags & NFT_BASE_CHAIN) {
1019 kfree(chain); 1136 nf_unregister_hooks(nft_base_chain(chain)->ops,
1137 afi->nops);
1138 }
1139err1:
1140 nf_tables_chain_destroy(chain);
1141 return err;
1020} 1142}
1021 1143
1022static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb, 1144static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
@@ -1024,11 +1146,13 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
1024 const struct nlattr * const nla[]) 1146 const struct nlattr * const nla[])
1025{ 1147{
1026 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1148 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1027 const struct nft_af_info *afi; 1149 struct nft_af_info *afi;
1028 struct nft_table *table; 1150 struct nft_table *table;
1029 struct nft_chain *chain; 1151 struct nft_chain *chain;
1030 struct net *net = sock_net(skb->sk); 1152 struct net *net = sock_net(skb->sk);
1031 int family = nfmsg->nfgen_family; 1153 int family = nfmsg->nfgen_family;
1154 struct nft_ctx ctx;
1155 int err;
1032 1156
1033 afi = nf_tables_afinfo_lookup(net, family, false); 1157 afi = nf_tables_afinfo_lookup(net, family, false);
1034 if (IS_ERR(afi)) 1158 if (IS_ERR(afi))
@@ -1037,48 +1161,26 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
1037 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]); 1161 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
1038 if (IS_ERR(table)) 1162 if (IS_ERR(table))
1039 return PTR_ERR(table); 1163 return PTR_ERR(table);
1164 if (table->flags & NFT_TABLE_INACTIVE)
1165 return -ENOENT;
1040 1166
1041 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]); 1167 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
1042 if (IS_ERR(chain)) 1168 if (IS_ERR(chain))
1043 return PTR_ERR(chain); 1169 return PTR_ERR(chain);
1044 1170 if (chain->flags & NFT_CHAIN_INACTIVE)
1171 return -ENOENT;
1045 if (!list_empty(&chain->rules) || chain->use > 0) 1172 if (!list_empty(&chain->rules) || chain->use > 0)
1046 return -EBUSY; 1173 return -EBUSY;
1047 1174
1048 list_del(&chain->list); 1175 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1049 table->use--; 1176 err = nft_trans_chain_add(&ctx, NFT_MSG_DELCHAIN);
1050 1177 if (err < 0)
1051 if (!(table->flags & NFT_TABLE_F_DORMANT) && 1178 return err;
1052 chain->flags & NFT_BASE_CHAIN)
1053 nf_unregister_hooks(nft_base_chain(chain)->ops, afi->nops);
1054
1055 nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
1056 family);
1057
1058 /* Make sure all rule references are gone before this is released */
1059 synchronize_rcu();
1060 1179
1061 nf_tables_chain_destroy(chain); 1180 list_del(&chain->list);
1062 return 0; 1181 return 0;
1063} 1182}
1064 1183
1065static void nft_ctx_init(struct nft_ctx *ctx,
1066 const struct sk_buff *skb,
1067 const struct nlmsghdr *nlh,
1068 const struct nft_af_info *afi,
1069 const struct nft_table *table,
1070 const struct nft_chain *chain,
1071 const struct nlattr * const *nla)
1072{
1073 ctx->net = sock_net(skb->sk);
1074 ctx->skb = skb;
1075 ctx->nlh = nlh;
1076 ctx->afi = afi;
1077 ctx->table = table;
1078 ctx->chain = chain;
1079 ctx->nla = nla;
1080}
1081
1082/* 1184/*
1083 * Expressions 1185 * Expressions
1084 */ 1186 */
@@ -1093,7 +1195,10 @@ static void nft_ctx_init(struct nft_ctx *ctx,
1093int nft_register_expr(struct nft_expr_type *type) 1195int nft_register_expr(struct nft_expr_type *type)
1094{ 1196{
1095 nfnl_lock(NFNL_SUBSYS_NFTABLES); 1197 nfnl_lock(NFNL_SUBSYS_NFTABLES);
1096 list_add_tail(&type->list, &nf_tables_expressions); 1198 if (type->family == NFPROTO_UNSPEC)
1199 list_add_tail(&type->list, &nf_tables_expressions);
1200 else
1201 list_add(&type->list, &nf_tables_expressions);
1097 nfnl_unlock(NFNL_SUBSYS_NFTABLES); 1202 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1098 return 0; 1203 return 0;
1099} 1204}
@@ -1361,22 +1466,15 @@ nla_put_failure:
1361 return -1; 1466 return -1;
1362} 1467}
1363 1468
1364static int nf_tables_rule_notify(const struct sk_buff *oskb, 1469static int nf_tables_rule_notify(const struct nft_ctx *ctx,
1365 const struct nlmsghdr *nlh,
1366 const struct nft_table *table,
1367 const struct nft_chain *chain,
1368 const struct nft_rule *rule, 1470 const struct nft_rule *rule,
1369 int event, u32 flags, int family) 1471 int event)
1370{ 1472{
1371 struct sk_buff *skb; 1473 struct sk_buff *skb;
1372 u32 portid = NETLINK_CB(oskb).portid;
1373 struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
1374 u32 seq = nlh->nlmsg_seq;
1375 bool report;
1376 int err; 1474 int err;
1377 1475
1378 report = nlmsg_report(nlh); 1476 if (!ctx->report &&
1379 if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES)) 1477 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
1380 return 0; 1478 return 0;
1381 1479
1382 err = -ENOBUFS; 1480 err = -ENOBUFS;
@@ -1384,18 +1482,21 @@ static int nf_tables_rule_notify(const struct sk_buff *oskb,
1384 if (skb == NULL) 1482 if (skb == NULL)
1385 goto err; 1483 goto err;
1386 1484
1387 err = nf_tables_fill_rule_info(skb, portid, seq, event, flags, 1485 err = nf_tables_fill_rule_info(skb, ctx->portid, ctx->seq, event, 0,
1388 family, table, chain, rule); 1486 ctx->afi->family, ctx->table,
1487 ctx->chain, rule);
1389 if (err < 0) { 1488 if (err < 0) {
1390 kfree_skb(skb); 1489 kfree_skb(skb);
1391 goto err; 1490 goto err;
1392 } 1491 }
1393 1492
1394 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, 1493 err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1395 GFP_KERNEL); 1494 ctx->report, GFP_KERNEL);
1396err: 1495err:
1397 if (err < 0) 1496 if (err < 0) {
1398 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err); 1497 nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
1498 err);
1499 }
1399 return err; 1500 return err;
1400} 1501}
1401 1502
@@ -1511,10 +1612,14 @@ static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
1511 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); 1612 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
1512 if (IS_ERR(table)) 1613 if (IS_ERR(table))
1513 return PTR_ERR(table); 1614 return PTR_ERR(table);
1615 if (table->flags & NFT_TABLE_INACTIVE)
1616 return -ENOENT;
1514 1617
1515 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); 1618 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
1516 if (IS_ERR(chain)) 1619 if (IS_ERR(chain))
1517 return PTR_ERR(chain); 1620 return PTR_ERR(chain);
1621 if (chain->flags & NFT_CHAIN_INACTIVE)
1622 return -ENOENT;
1518 1623
1519 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); 1624 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
1520 if (IS_ERR(rule)) 1625 if (IS_ERR(rule))
@@ -1554,37 +1659,36 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
1554 kfree(rule); 1659 kfree(rule);
1555} 1660}
1556 1661
1557#define NFT_RULE_MAXEXPRS 128 1662static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
1558 1663 struct nft_rule *rule)
1559static struct nft_expr_info *info;
1560
1561static struct nft_rule_trans *
1562nf_tables_trans_add(struct nft_ctx *ctx, struct nft_rule *rule)
1563{ 1664{
1564 struct nft_rule_trans *rupd; 1665 struct nft_trans *trans;
1565 1666
1566 rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL); 1667 trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_rule));
1567 if (rupd == NULL) 1668 if (trans == NULL)
1568 return NULL; 1669 return NULL;
1569 1670
1570 rupd->ctx = *ctx; 1671 nft_trans_rule(trans) = rule;
1571 rupd->rule = rule; 1672 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
1572 list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
1573 1673
1574 return rupd; 1674 return trans;
1575} 1675}
1576 1676
1677#define NFT_RULE_MAXEXPRS 128
1678
1679static struct nft_expr_info *info;
1680
1577static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, 1681static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1578 const struct nlmsghdr *nlh, 1682 const struct nlmsghdr *nlh,
1579 const struct nlattr * const nla[]) 1683 const struct nlattr * const nla[])
1580{ 1684{
1581 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1685 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1582 const struct nft_af_info *afi; 1686 struct nft_af_info *afi;
1583 struct net *net = sock_net(skb->sk); 1687 struct net *net = sock_net(skb->sk);
1584 struct nft_table *table; 1688 struct nft_table *table;
1585 struct nft_chain *chain; 1689 struct nft_chain *chain;
1586 struct nft_rule *rule, *old_rule = NULL; 1690 struct nft_rule *rule, *old_rule = NULL;
1587 struct nft_rule_trans *repl = NULL; 1691 struct nft_trans *trans = NULL;
1588 struct nft_expr *expr; 1692 struct nft_expr *expr;
1589 struct nft_ctx ctx; 1693 struct nft_ctx ctx;
1590 struct nlattr *tmp; 1694 struct nlattr *tmp;
@@ -1682,8 +1786,9 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1682 1786
1683 if (nlh->nlmsg_flags & NLM_F_REPLACE) { 1787 if (nlh->nlmsg_flags & NLM_F_REPLACE) {
1684 if (nft_rule_is_active_next(net, old_rule)) { 1788 if (nft_rule_is_active_next(net, old_rule)) {
1685 repl = nf_tables_trans_add(&ctx, old_rule); 1789 trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE,
1686 if (repl == NULL) { 1790 old_rule);
1791 if (trans == NULL) {
1687 err = -ENOMEM; 1792 err = -ENOMEM;
1688 goto err2; 1793 goto err2;
1689 } 1794 }
@@ -1705,7 +1810,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1705 list_add_rcu(&rule->list, &chain->rules); 1810 list_add_rcu(&rule->list, &chain->rules);
1706 } 1811 }
1707 1812
1708 if (nf_tables_trans_add(&ctx, rule) == NULL) { 1813 if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
1709 err = -ENOMEM; 1814 err = -ENOMEM;
1710 goto err3; 1815 goto err3;
1711 } 1816 }
@@ -1713,11 +1818,10 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1713 1818
1714err3: 1819err3:
1715 list_del_rcu(&rule->list); 1820 list_del_rcu(&rule->list);
1716 if (repl) { 1821 if (trans) {
1717 list_del_rcu(&repl->rule->list); 1822 list_del_rcu(&nft_trans_rule(trans)->list);
1718 list_del(&repl->list); 1823 nft_rule_clear(net, nft_trans_rule(trans));
1719 nft_rule_clear(net, repl->rule); 1824 nft_trans_destroy(trans);
1720 kfree(repl);
1721 } 1825 }
1722err2: 1826err2:
1723 nf_tables_rule_destroy(&ctx, rule); 1827 nf_tables_rule_destroy(&ctx, rule);
@@ -1734,7 +1838,7 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
1734{ 1838{
1735 /* You cannot delete the same rule twice */ 1839 /* You cannot delete the same rule twice */
1736 if (nft_rule_is_active_next(ctx->net, rule)) { 1840 if (nft_rule_is_active_next(ctx->net, rule)) {
1737 if (nf_tables_trans_add(ctx, rule) == NULL) 1841 if (nft_trans_rule_add(ctx, NFT_MSG_DELRULE, rule) == NULL)
1738 return -ENOMEM; 1842 return -ENOMEM;
1739 nft_rule_disactivate_next(ctx->net, rule); 1843 nft_rule_disactivate_next(ctx->net, rule);
1740 return 0; 1844 return 0;
@@ -1760,9 +1864,9 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
1760 const struct nlattr * const nla[]) 1864 const struct nlattr * const nla[])
1761{ 1865{
1762 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 1866 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1763 const struct nft_af_info *afi; 1867 struct nft_af_info *afi;
1764 struct net *net = sock_net(skb->sk); 1868 struct net *net = sock_net(skb->sk);
1765 const struct nft_table *table; 1869 struct nft_table *table;
1766 struct nft_chain *chain = NULL; 1870 struct nft_chain *chain = NULL;
1767 struct nft_rule *rule; 1871 struct nft_rule *rule;
1768 int family = nfmsg->nfgen_family, err = 0; 1872 int family = nfmsg->nfgen_family, err = 0;
@@ -1775,6 +1879,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
1775 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]); 1879 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
1776 if (IS_ERR(table)) 1880 if (IS_ERR(table))
1777 return PTR_ERR(table); 1881 return PTR_ERR(table);
1882 if (table->flags & NFT_TABLE_INACTIVE)
1883 return -ENOENT;
1778 1884
1779 if (nla[NFTA_RULE_CHAIN]) { 1885 if (nla[NFTA_RULE_CHAIN]) {
1780 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); 1886 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
@@ -1807,88 +1913,6 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
1807 return err; 1913 return err;
1808} 1914}
1809 1915
1810static int nf_tables_commit(struct sk_buff *skb)
1811{
1812 struct net *net = sock_net(skb->sk);
1813 struct nft_rule_trans *rupd, *tmp;
1814
1815 /* Bump generation counter, invalidate any dump in progress */
1816 net->nft.genctr++;
1817
1818 /* A new generation has just started */
1819 net->nft.gencursor = gencursor_next(net);
1820
1821 /* Make sure all packets have left the previous generation before
1822 * purging old rules.
1823 */
1824 synchronize_rcu();
1825
1826 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1827 /* This rule was inactive in the past and just became active.
1828 * Clear the next bit of the genmask since its meaning has
1829 * changed, now it is the future.
1830 */
1831 if (nft_rule_is_active(net, rupd->rule)) {
1832 nft_rule_clear(net, rupd->rule);
1833 nf_tables_rule_notify(skb, rupd->ctx.nlh,
1834 rupd->ctx.table, rupd->ctx.chain,
1835 rupd->rule, NFT_MSG_NEWRULE, 0,
1836 rupd->ctx.afi->family);
1837 list_del(&rupd->list);
1838 kfree(rupd);
1839 continue;
1840 }
1841
1842 /* This rule is in the past, get rid of it */
1843 list_del_rcu(&rupd->rule->list);
1844 nf_tables_rule_notify(skb, rupd->ctx.nlh,
1845 rupd->ctx.table, rupd->ctx.chain,
1846 rupd->rule, NFT_MSG_DELRULE, 0,
1847 rupd->ctx.afi->family);
1848 }
1849
1850 /* Make sure we don't see any packet traversing old rules */
1851 synchronize_rcu();
1852
1853 /* Now we can safely release unused old rules */
1854 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1855 nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
1856 list_del(&rupd->list);
1857 kfree(rupd);
1858 }
1859
1860 return 0;
1861}
1862
1863static int nf_tables_abort(struct sk_buff *skb)
1864{
1865 struct net *net = sock_net(skb->sk);
1866 struct nft_rule_trans *rupd, *tmp;
1867
1868 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1869 if (!nft_rule_is_active_next(net, rupd->rule)) {
1870 nft_rule_clear(net, rupd->rule);
1871 list_del(&rupd->list);
1872 kfree(rupd);
1873 continue;
1874 }
1875
1876 /* This rule is inactive, get rid of it */
1877 list_del_rcu(&rupd->rule->list);
1878 }
1879
1880 /* Make sure we don't see any packet accessing aborted rules */
1881 synchronize_rcu();
1882
1883 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1884 nf_tables_rule_destroy(&rupd->ctx, rupd->rule);
1885 list_del(&rupd->list);
1886 kfree(rupd);
1887 }
1888
1889 return 0;
1890}
1891
1892/* 1916/*
1893 * Sets 1917 * Sets
1894 */ 1918 */
@@ -1912,9 +1936,18 @@ void nft_unregister_set(struct nft_set_ops *ops)
1912} 1936}
1913EXPORT_SYMBOL_GPL(nft_unregister_set); 1937EXPORT_SYMBOL_GPL(nft_unregister_set);
1914 1938
1915static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[]) 1939/*
1940 * Select a set implementation based on the data characteristics and the
1941 * given policy. The total memory use might not be known if no size is
1942 * given, in that case the amount of memory per element is used.
1943 */
1944static const struct nft_set_ops *
1945nft_select_set_ops(const struct nlattr * const nla[],
1946 const struct nft_set_desc *desc,
1947 enum nft_set_policies policy)
1916{ 1948{
1917 const struct nft_set_ops *ops; 1949 const struct nft_set_ops *ops, *bops;
1950 struct nft_set_estimate est, best;
1918 u32 features; 1951 u32 features;
1919 1952
1920#ifdef CONFIG_MODULES 1953#ifdef CONFIG_MODULES
@@ -1932,15 +1965,45 @@ static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const
1932 features &= NFT_SET_INTERVAL | NFT_SET_MAP; 1965 features &= NFT_SET_INTERVAL | NFT_SET_MAP;
1933 } 1966 }
1934 1967
1935 // FIXME: implement selection properly 1968 bops = NULL;
1969 best.size = ~0;
1970 best.class = ~0;
1971
1936 list_for_each_entry(ops, &nf_tables_set_ops, list) { 1972 list_for_each_entry(ops, &nf_tables_set_ops, list) {
1937 if ((ops->features & features) != features) 1973 if ((ops->features & features) != features)
1938 continue; 1974 continue;
1975 if (!ops->estimate(desc, features, &est))
1976 continue;
1977
1978 switch (policy) {
1979 case NFT_SET_POL_PERFORMANCE:
1980 if (est.class < best.class)
1981 break;
1982 if (est.class == best.class && est.size < best.size)
1983 break;
1984 continue;
1985 case NFT_SET_POL_MEMORY:
1986 if (est.size < best.size)
1987 break;
1988 if (est.size == best.size && est.class < best.class)
1989 break;
1990 continue;
1991 default:
1992 break;
1993 }
1994
1939 if (!try_module_get(ops->owner)) 1995 if (!try_module_get(ops->owner))
1940 continue; 1996 continue;
1941 return ops; 1997 if (bops != NULL)
1998 module_put(bops->owner);
1999
2000 bops = ops;
2001 best = est;
1942 } 2002 }
1943 2003
2004 if (bops != NULL)
2005 return bops;
2006
1944 return ERR_PTR(-EOPNOTSUPP); 2007 return ERR_PTR(-EOPNOTSUPP);
1945} 2008}
1946 2009
@@ -1953,6 +2016,13 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
1953 [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, 2016 [NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
1954 [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, 2017 [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
1955 [NFTA_SET_DATA_LEN] = { .type = NLA_U32 }, 2018 [NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
2019 [NFTA_SET_POLICY] = { .type = NLA_U32 },
2020 [NFTA_SET_DESC] = { .type = NLA_NESTED },
2021 [NFTA_SET_ID] = { .type = NLA_U32 },
2022};
2023
2024static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
2025 [NFTA_SET_DESC_SIZE] = { .type = NLA_U32 },
1956}; 2026};
1957 2027
1958static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, 2028static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
@@ -1962,8 +2032,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
1962{ 2032{
1963 struct net *net = sock_net(skb->sk); 2033 struct net *net = sock_net(skb->sk);
1964 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2034 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1965 const struct nft_af_info *afi = NULL; 2035 struct nft_af_info *afi = NULL;
1966 const struct nft_table *table = NULL; 2036 struct nft_table *table = NULL;
1967 2037
1968 if (nfmsg->nfgen_family != NFPROTO_UNSPEC) { 2038 if (nfmsg->nfgen_family != NFPROTO_UNSPEC) {
1969 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); 2039 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -1978,6 +2048,8 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
1978 table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]); 2048 table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
1979 if (IS_ERR(table)) 2049 if (IS_ERR(table))
1980 return PTR_ERR(table); 2050 return PTR_ERR(table);
2051 if (table->flags & NFT_TABLE_INACTIVE)
2052 return -ENOENT;
1981 } 2053 }
1982 2054
1983 nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); 2055 nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
@@ -1999,13 +2071,27 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
1999 return ERR_PTR(-ENOENT); 2071 return ERR_PTR(-ENOENT);
2000} 2072}
2001 2073
2074struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
2075 const struct nlattr *nla)
2076{
2077 struct nft_trans *trans;
2078 u32 id = ntohl(nla_get_be32(nla));
2079
2080 list_for_each_entry(trans, &net->nft.commit_list, list) {
2081 if (trans->msg_type == NFT_MSG_NEWSET &&
2082 id == nft_trans_set_id(trans))
2083 return nft_trans_set(trans);
2084 }
2085 return ERR_PTR(-ENOENT);
2086}
2087
2002static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, 2088static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
2003 const char *name) 2089 const char *name)
2004{ 2090{
2005 const struct nft_set *i; 2091 const struct nft_set *i;
2006 const char *p; 2092 const char *p;
2007 unsigned long *inuse; 2093 unsigned long *inuse;
2008 unsigned int n = 0; 2094 unsigned int n = 0, min = 0;
2009 2095
2010 p = strnchr(name, IFNAMSIZ, '%'); 2096 p = strnchr(name, IFNAMSIZ, '%');
2011 if (p != NULL) { 2097 if (p != NULL) {
@@ -2015,23 +2101,28 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
2015 inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL); 2101 inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
2016 if (inuse == NULL) 2102 if (inuse == NULL)
2017 return -ENOMEM; 2103 return -ENOMEM;
2018 2104cont:
2019 list_for_each_entry(i, &ctx->table->sets, list) { 2105 list_for_each_entry(i, &ctx->table->sets, list) {
2020 int tmp; 2106 int tmp;
2021 2107
2022 if (!sscanf(i->name, name, &tmp)) 2108 if (!sscanf(i->name, name, &tmp))
2023 continue; 2109 continue;
2024 if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE) 2110 if (tmp < min || tmp >= min + BITS_PER_BYTE * PAGE_SIZE)
2025 continue; 2111 continue;
2026 2112
2027 set_bit(tmp, inuse); 2113 set_bit(tmp - min, inuse);
2028 } 2114 }
2029 2115
2030 n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE); 2116 n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
2117 if (n >= BITS_PER_BYTE * PAGE_SIZE) {
2118 min += BITS_PER_BYTE * PAGE_SIZE;
2119 memset(inuse, 0, PAGE_SIZE);
2120 goto cont;
2121 }
2031 free_page((unsigned long)inuse); 2122 free_page((unsigned long)inuse);
2032 } 2123 }
2033 2124
2034 snprintf(set->name, sizeof(set->name), name, n); 2125 snprintf(set->name, sizeof(set->name), name, min + n);
2035 list_for_each_entry(i, &ctx->table->sets, list) { 2126 list_for_each_entry(i, &ctx->table->sets, list) {
2036 if (!strcmp(set->name, i->name)) 2127 if (!strcmp(set->name, i->name))
2037 return -ENFILE; 2128 return -ENFILE;
@@ -2044,8 +2135,9 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2044{ 2135{
2045 struct nfgenmsg *nfmsg; 2136 struct nfgenmsg *nfmsg;
2046 struct nlmsghdr *nlh; 2137 struct nlmsghdr *nlh;
2047 u32 portid = NETLINK_CB(ctx->skb).portid; 2138 struct nlattr *desc;
2048 u32 seq = ctx->nlh->nlmsg_seq; 2139 u32 portid = ctx->portid;
2140 u32 seq = ctx->seq;
2049 2141
2050 event |= NFNL_SUBSYS_NFTABLES << 8; 2142 event |= NFNL_SUBSYS_NFTABLES << 8;
2051 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 2143 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
@@ -2077,6 +2169,14 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
2077 goto nla_put_failure; 2169 goto nla_put_failure;
2078 } 2170 }
2079 2171
2172 desc = nla_nest_start(skb, NFTA_SET_DESC);
2173 if (desc == NULL)
2174 goto nla_put_failure;
2175 if (set->size &&
2176 nla_put_be32(skb, NFTA_SET_DESC_SIZE, htonl(set->size)))
2177 goto nla_put_failure;
2178 nla_nest_end(skb, desc);
2179
2080 return nlmsg_end(skb, nlh); 2180 return nlmsg_end(skb, nlh);
2081 2181
2082nla_put_failure: 2182nla_put_failure:
@@ -2089,12 +2189,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
2089 int event) 2189 int event)
2090{ 2190{
2091 struct sk_buff *skb; 2191 struct sk_buff *skb;
2092 u32 portid = NETLINK_CB(ctx->skb).portid; 2192 u32 portid = ctx->portid;
2093 bool report;
2094 int err; 2193 int err;
2095 2194
2096 report = nlmsg_report(ctx->nlh); 2195 if (!ctx->report &&
2097 if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES)) 2196 !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
2098 return 0; 2197 return 0;
2099 2198
2100 err = -ENOBUFS; 2199 err = -ENOBUFS;
@@ -2108,8 +2207,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
2108 goto err; 2207 goto err;
2109 } 2208 }
2110 2209
2111 err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report, 2210 err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
2112 GFP_KERNEL); 2211 ctx->report, GFP_KERNEL);
2113err: 2212err:
2114 if (err < 0) 2213 if (err < 0)
2115 nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err); 2214 nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
@@ -2183,7 +2282,7 @@ static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
2183{ 2282{
2184 const struct nft_set *set; 2283 const struct nft_set *set;
2185 unsigned int idx, s_idx = cb->args[0]; 2284 unsigned int idx, s_idx = cb->args[0];
2186 const struct nft_af_info *afi; 2285 struct nft_af_info *afi;
2187 struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2]; 2286 struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
2188 struct net *net = sock_net(skb->sk); 2287 struct net *net = sock_net(skb->sk);
2189 int cur_family = cb->args[3]; 2288 int cur_family = cb->args[3];
@@ -2260,6 +2359,8 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
2260 return ret; 2359 return ret;
2261} 2360}
2262 2361
2362#define NFT_SET_INACTIVE (1 << 15) /* Internal set flag */
2363
2263static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb, 2364static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
2264 const struct nlmsghdr *nlh, 2365 const struct nlmsghdr *nlh,
2265 const struct nlattr * const nla[]) 2366 const struct nlattr * const nla[])
@@ -2289,6 +2390,8 @@ static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
2289 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); 2390 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
2290 if (IS_ERR(set)) 2391 if (IS_ERR(set))
2291 return PTR_ERR(set); 2392 return PTR_ERR(set);
2393 if (set->flags & NFT_SET_INACTIVE)
2394 return -ENOENT;
2292 2395
2293 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2396 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2294 if (skb2 == NULL) 2397 if (skb2 == NULL)
@@ -2305,13 +2408,50 @@ err:
2305 return err; 2408 return err;
2306} 2409}
2307 2410
2411static int nf_tables_set_desc_parse(const struct nft_ctx *ctx,
2412 struct nft_set_desc *desc,
2413 const struct nlattr *nla)
2414{
2415 struct nlattr *da[NFTA_SET_DESC_MAX + 1];
2416 int err;
2417
2418 err = nla_parse_nested(da, NFTA_SET_DESC_MAX, nla, nft_set_desc_policy);
2419 if (err < 0)
2420 return err;
2421
2422 if (da[NFTA_SET_DESC_SIZE] != NULL)
2423 desc->size = ntohl(nla_get_be32(da[NFTA_SET_DESC_SIZE]));
2424
2425 return 0;
2426}
2427
2428static int nft_trans_set_add(struct nft_ctx *ctx, int msg_type,
2429 struct nft_set *set)
2430{
2431 struct nft_trans *trans;
2432
2433 trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set));
2434 if (trans == NULL)
2435 return -ENOMEM;
2436
2437 if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
2438 nft_trans_set_id(trans) =
2439 ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
2440 set->flags |= NFT_SET_INACTIVE;
2441 }
2442 nft_trans_set(trans) = set;
2443 list_add_tail(&trans->list, &ctx->net->nft.commit_list);
2444
2445 return 0;
2446}
2447
2308static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, 2448static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2309 const struct nlmsghdr *nlh, 2449 const struct nlmsghdr *nlh,
2310 const struct nlattr * const nla[]) 2450 const struct nlattr * const nla[])
2311{ 2451{
2312 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2452 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2313 const struct nft_set_ops *ops; 2453 const struct nft_set_ops *ops;
2314 const struct nft_af_info *afi; 2454 struct nft_af_info *afi;
2315 struct net *net = sock_net(skb->sk); 2455 struct net *net = sock_net(skb->sk);
2316 struct nft_table *table; 2456 struct nft_table *table;
2317 struct nft_set *set; 2457 struct nft_set *set;
@@ -2319,14 +2459,18 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2319 char name[IFNAMSIZ]; 2459 char name[IFNAMSIZ];
2320 unsigned int size; 2460 unsigned int size;
2321 bool create; 2461 bool create;
2322 u32 ktype, klen, dlen, dtype, flags; 2462 u32 ktype, dtype, flags, policy;
2463 struct nft_set_desc desc;
2323 int err; 2464 int err;
2324 2465
2325 if (nla[NFTA_SET_TABLE] == NULL || 2466 if (nla[NFTA_SET_TABLE] == NULL ||
2326 nla[NFTA_SET_NAME] == NULL || 2467 nla[NFTA_SET_NAME] == NULL ||
2327 nla[NFTA_SET_KEY_LEN] == NULL) 2468 nla[NFTA_SET_KEY_LEN] == NULL ||
2469 nla[NFTA_SET_ID] == NULL)
2328 return -EINVAL; 2470 return -EINVAL;
2329 2471
2472 memset(&desc, 0, sizeof(desc));
2473
2330 ktype = NFT_DATA_VALUE; 2474 ktype = NFT_DATA_VALUE;
2331 if (nla[NFTA_SET_KEY_TYPE] != NULL) { 2475 if (nla[NFTA_SET_KEY_TYPE] != NULL) {
2332 ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); 2476 ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
@@ -2334,8 +2478,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2334 return -EINVAL; 2478 return -EINVAL;
2335 } 2479 }
2336 2480
2337 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); 2481 desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
2338 if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data)) 2482 if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
2339 return -EINVAL; 2483 return -EINVAL;
2340 2484
2341 flags = 0; 2485 flags = 0;
@@ -2347,7 +2491,6 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2347 } 2491 }
2348 2492
2349 dtype = 0; 2493 dtype = 0;
2350 dlen = 0;
2351 if (nla[NFTA_SET_DATA_TYPE] != NULL) { 2494 if (nla[NFTA_SET_DATA_TYPE] != NULL) {
2352 if (!(flags & NFT_SET_MAP)) 2495 if (!(flags & NFT_SET_MAP))
2353 return -EINVAL; 2496 return -EINVAL;
@@ -2360,15 +2503,25 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2360 if (dtype != NFT_DATA_VERDICT) { 2503 if (dtype != NFT_DATA_VERDICT) {
2361 if (nla[NFTA_SET_DATA_LEN] == NULL) 2504 if (nla[NFTA_SET_DATA_LEN] == NULL)
2362 return -EINVAL; 2505 return -EINVAL;
2363 dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); 2506 desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
2364 if (dlen == 0 || 2507 if (desc.dlen == 0 ||
2365 dlen > FIELD_SIZEOF(struct nft_data, data)) 2508 desc.dlen > FIELD_SIZEOF(struct nft_data, data))
2366 return -EINVAL; 2509 return -EINVAL;
2367 } else 2510 } else
2368 dlen = sizeof(struct nft_data); 2511 desc.dlen = sizeof(struct nft_data);
2369 } else if (flags & NFT_SET_MAP) 2512 } else if (flags & NFT_SET_MAP)
2370 return -EINVAL; 2513 return -EINVAL;
2371 2514
2515 policy = NFT_SET_POL_PERFORMANCE;
2516 if (nla[NFTA_SET_POLICY] != NULL)
2517 policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
2518
2519 if (nla[NFTA_SET_DESC] != NULL) {
2520 err = nf_tables_set_desc_parse(&ctx, &desc, nla[NFTA_SET_DESC]);
2521 if (err < 0)
2522 return err;
2523 }
2524
2372 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 2525 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
2373 2526
2374 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create); 2527 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
@@ -2399,7 +2552,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2399 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) 2552 if (!(nlh->nlmsg_flags & NLM_F_CREATE))
2400 return -ENOENT; 2553 return -ENOENT;
2401 2554
2402 ops = nft_select_set_ops(nla); 2555 ops = nft_select_set_ops(nla, &desc, policy);
2403 if (IS_ERR(ops)) 2556 if (IS_ERR(ops))
2404 return PTR_ERR(ops); 2557 return PTR_ERR(ops);
2405 2558
@@ -2420,17 +2573,21 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2420 INIT_LIST_HEAD(&set->bindings); 2573 INIT_LIST_HEAD(&set->bindings);
2421 set->ops = ops; 2574 set->ops = ops;
2422 set->ktype = ktype; 2575 set->ktype = ktype;
2423 set->klen = klen; 2576 set->klen = desc.klen;
2424 set->dtype = dtype; 2577 set->dtype = dtype;
2425 set->dlen = dlen; 2578 set->dlen = desc.dlen;
2426 set->flags = flags; 2579 set->flags = flags;
2580 set->size = desc.size;
2427 2581
2428 err = ops->init(set, nla); 2582 err = ops->init(set, &desc, nla);
2583 if (err < 0)
2584 goto err2;
2585
2586 err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
2429 if (err < 0) 2587 if (err < 0)
2430 goto err2; 2588 goto err2;
2431 2589
2432 list_add_tail(&set->list, &table->sets); 2590 list_add_tail(&set->list, &table->sets);
2433 nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
2434 return 0; 2591 return 0;
2435 2592
2436err2: 2593err2:
@@ -2440,16 +2597,20 @@ err1:
2440 return err; 2597 return err;
2441} 2598}
2442 2599
2443static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set) 2600static void nft_set_destroy(struct nft_set *set)
2444{ 2601{
2445 list_del(&set->list);
2446 nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
2447
2448 set->ops->destroy(set); 2602 set->ops->destroy(set);
2449 module_put(set->ops->owner); 2603 module_put(set->ops->owner);
2450 kfree(set); 2604 kfree(set);
2451} 2605}
2452 2606
2607static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
2608{
2609 list_del(&set->list);
2610 nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
2611 nft_set_destroy(set);
2612}
2613
2453static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb, 2614static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
2454 const struct nlmsghdr *nlh, 2615 const struct nlmsghdr *nlh,
2455 const struct nlattr * const nla[]) 2616 const struct nlattr * const nla[])
@@ -2471,10 +2632,16 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
2471 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); 2632 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
2472 if (IS_ERR(set)) 2633 if (IS_ERR(set))
2473 return PTR_ERR(set); 2634 return PTR_ERR(set);
2635 if (set->flags & NFT_SET_INACTIVE)
2636 return -ENOENT;
2474 if (!list_empty(&set->bindings)) 2637 if (!list_empty(&set->bindings))
2475 return -EBUSY; 2638 return -EBUSY;
2476 2639
2477 nf_tables_set_destroy(&ctx, set); 2640 err = nft_trans_set_add(&ctx, NFT_MSG_DELSET, set);
2641 if (err < 0)
2642 return err;
2643
2644 list_del(&set->list);
2478 return 0; 2645 return 0;
2479} 2646}
2480 2647
@@ -2534,7 +2701,8 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
2534{ 2701{
2535 list_del(&binding->list); 2702 list_del(&binding->list);
2536 2703
2537 if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) 2704 if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS &&
2705 !(set->flags & NFT_SET_INACTIVE))
2538 nf_tables_set_destroy(ctx, set); 2706 nf_tables_set_destroy(ctx, set);
2539} 2707}
2540 2708
@@ -2552,16 +2720,18 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
2552 [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING }, 2720 [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
2553 [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING }, 2721 [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
2554 [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED }, 2722 [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
2723 [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 },
2555}; 2724};
2556 2725
2557static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, 2726static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
2558 const struct sk_buff *skb, 2727 const struct sk_buff *skb,
2559 const struct nlmsghdr *nlh, 2728 const struct nlmsghdr *nlh,
2560 const struct nlattr * const nla[]) 2729 const struct nlattr * const nla[],
2730 bool trans)
2561{ 2731{
2562 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2732 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2563 const struct nft_af_info *afi; 2733 struct nft_af_info *afi;
2564 const struct nft_table *table; 2734 struct nft_table *table;
2565 struct net *net = sock_net(skb->sk); 2735 struct net *net = sock_net(skb->sk);
2566 2736
2567 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false); 2737 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
@@ -2571,6 +2741,8 @@ static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
2571 table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]); 2741 table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
2572 if (IS_ERR(table)) 2742 if (IS_ERR(table))
2573 return PTR_ERR(table); 2743 return PTR_ERR(table);
2744 if (!trans && (table->flags & NFT_TABLE_INACTIVE))
2745 return -ENOENT;
2574 2746
2575 nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla); 2747 nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
2576 return 0; 2748 return 0;
@@ -2644,13 +2816,16 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
2644 if (err < 0) 2816 if (err < 0)
2645 return err; 2817 return err;
2646 2818
2647 err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla); 2819 err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla,
2820 false);
2648 if (err < 0) 2821 if (err < 0)
2649 return err; 2822 return err;
2650 2823
2651 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); 2824 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
2652 if (IS_ERR(set)) 2825 if (IS_ERR(set))
2653 return PTR_ERR(set); 2826 return PTR_ERR(set);
2827 if (set->flags & NFT_SET_INACTIVE)
2828 return -ENOENT;
2654 2829
2655 event = NFT_MSG_NEWSETELEM; 2830 event = NFT_MSG_NEWSETELEM;
2656 event |= NFNL_SUBSYS_NFTABLES << 8; 2831 event |= NFNL_SUBSYS_NFTABLES << 8;
@@ -2707,13 +2882,15 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
2707 struct nft_ctx ctx; 2882 struct nft_ctx ctx;
2708 int err; 2883 int err;
2709 2884
2710 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); 2885 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
2711 if (err < 0) 2886 if (err < 0)
2712 return err; 2887 return err;
2713 2888
2714 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); 2889 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
2715 if (IS_ERR(set)) 2890 if (IS_ERR(set))
2716 return PTR_ERR(set); 2891 return PTR_ERR(set);
2892 if (set->flags & NFT_SET_INACTIVE)
2893 return -ENOENT;
2717 2894
2718 if (nlh->nlmsg_flags & NLM_F_DUMP) { 2895 if (nlh->nlmsg_flags & NLM_F_DUMP) {
2719 struct netlink_dump_control c = { 2896 struct netlink_dump_control c = {
@@ -2724,7 +2901,98 @@ static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
2724 return -EOPNOTSUPP; 2901 return -EOPNOTSUPP;
2725} 2902}
2726 2903
2727static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set, 2904static int nf_tables_fill_setelem_info(struct sk_buff *skb,
2905 const struct nft_ctx *ctx, u32 seq,
2906 u32 portid, int event, u16 flags,
2907 const struct nft_set *set,
2908 const struct nft_set_elem *elem)
2909{
2910 struct nfgenmsg *nfmsg;
2911 struct nlmsghdr *nlh;
2912 struct nlattr *nest;
2913 int err;
2914
2915 event |= NFNL_SUBSYS_NFTABLES << 8;
2916 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
2917 flags);
2918 if (nlh == NULL)
2919 goto nla_put_failure;
2920
2921 nfmsg = nlmsg_data(nlh);
2922 nfmsg->nfgen_family = ctx->afi->family;
2923 nfmsg->version = NFNETLINK_V0;
2924 nfmsg->res_id = 0;
2925
2926 if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
2927 goto nla_put_failure;
2928 if (nla_put_string(skb, NFTA_SET_NAME, set->name))
2929 goto nla_put_failure;
2930
2931 nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
2932 if (nest == NULL)
2933 goto nla_put_failure;
2934
2935 err = nf_tables_fill_setelem(skb, set, elem);
2936 if (err < 0)
2937 goto nla_put_failure;
2938
2939 nla_nest_end(skb, nest);
2940
2941 return nlmsg_end(skb, nlh);
2942
2943nla_put_failure:
2944 nlmsg_trim(skb, nlh);
2945 return -1;
2946}
2947
2948static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
2949 const struct nft_set *set,
2950 const struct nft_set_elem *elem,
2951 int event, u16 flags)
2952{
2953 struct net *net = ctx->net;
2954 u32 portid = ctx->portid;
2955 struct sk_buff *skb;
2956 int err;
2957
2958 if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
2959 return 0;
2960
2961 err = -ENOBUFS;
2962 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
2963 if (skb == NULL)
2964 goto err;
2965
2966 err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
2967 set, elem);
2968 if (err < 0) {
2969 kfree_skb(skb);
2970 goto err;
2971 }
2972
2973 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
2974 GFP_KERNEL);
2975err:
2976 if (err < 0)
2977 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
2978 return err;
2979}
2980
2981static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
2982 int msg_type,
2983 struct nft_set *set)
2984{
2985 struct nft_trans *trans;
2986
2987 trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
2988 if (trans == NULL)
2989 return NULL;
2990
2991 nft_trans_elem_set(trans) = set;
2992 return trans;
2993}
2994
2995static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
2728 const struct nlattr *attr) 2996 const struct nlattr *attr)
2729{ 2997{
2730 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; 2998 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -2732,8 +3000,12 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
2732 struct nft_set_elem elem; 3000 struct nft_set_elem elem;
2733 struct nft_set_binding *binding; 3001 struct nft_set_binding *binding;
2734 enum nft_registers dreg; 3002 enum nft_registers dreg;
3003 struct nft_trans *trans;
2735 int err; 3004 int err;
2736 3005
3006 if (set->size && set->nelems == set->size)
3007 return -ENFILE;
3008
2737 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, 3009 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
2738 nft_set_elem_policy); 3010 nft_set_elem_policy);
2739 if (err < 0) 3011 if (err < 0)
@@ -2786,7 +3058,7 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
2786 struct nft_ctx bind_ctx = { 3058 struct nft_ctx bind_ctx = {
2787 .afi = ctx->afi, 3059 .afi = ctx->afi,
2788 .table = ctx->table, 3060 .table = ctx->table,
2789 .chain = binding->chain, 3061 .chain = (struct nft_chain *)binding->chain,
2790 }; 3062 };
2791 3063
2792 err = nft_validate_data_load(&bind_ctx, dreg, 3064 err = nft_validate_data_load(&bind_ctx, dreg,
@@ -2796,12 +3068,20 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
2796 } 3068 }
2797 } 3069 }
2798 3070
3071 trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
3072 if (trans == NULL)
3073 goto err3;
3074
2799 err = set->ops->insert(set, &elem); 3075 err = set->ops->insert(set, &elem);
2800 if (err < 0) 3076 if (err < 0)
2801 goto err3; 3077 goto err4;
2802 3078
3079 nft_trans_elem(trans) = elem;
3080 list_add(&trans->list, &ctx->net->nft.commit_list);
2803 return 0; 3081 return 0;
2804 3082
3083err4:
3084 kfree(trans);
2805err3: 3085err3:
2806 if (nla[NFTA_SET_ELEM_DATA] != NULL) 3086 if (nla[NFTA_SET_ELEM_DATA] != NULL)
2807 nft_data_uninit(&elem.data, d2.type); 3087 nft_data_uninit(&elem.data, d2.type);
@@ -2815,35 +3095,44 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
2815 const struct nlmsghdr *nlh, 3095 const struct nlmsghdr *nlh,
2816 const struct nlattr * const nla[]) 3096 const struct nlattr * const nla[])
2817{ 3097{
3098 struct net *net = sock_net(skb->sk);
2818 const struct nlattr *attr; 3099 const struct nlattr *attr;
2819 struct nft_set *set; 3100 struct nft_set *set;
2820 struct nft_ctx ctx; 3101 struct nft_ctx ctx;
2821 int rem, err; 3102 int rem, err = 0;
2822 3103
2823 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); 3104 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, true);
2824 if (err < 0) 3105 if (err < 0)
2825 return err; 3106 return err;
2826 3107
2827 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]); 3108 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
2828 if (IS_ERR(set)) 3109 if (IS_ERR(set)) {
2829 return PTR_ERR(set); 3110 if (nla[NFTA_SET_ELEM_LIST_SET_ID]) {
3111 set = nf_tables_set_lookup_byid(net,
3112 nla[NFTA_SET_ELEM_LIST_SET_ID]);
3113 }
3114 if (IS_ERR(set))
3115 return PTR_ERR(set);
3116 }
3117
2830 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) 3118 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
2831 return -EBUSY; 3119 return -EBUSY;
2832 3120
2833 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 3121 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
2834 err = nft_add_set_elem(&ctx, set, attr); 3122 err = nft_add_set_elem(&ctx, set, attr);
2835 if (err < 0) 3123 if (err < 0)
2836 return err; 3124 break;
2837 } 3125 }
2838 return 0; 3126 return err;
2839} 3127}
2840 3128
2841static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set, 3129static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
2842 const struct nlattr *attr) 3130 const struct nlattr *attr)
2843{ 3131{
2844 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; 3132 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
2845 struct nft_data_desc desc; 3133 struct nft_data_desc desc;
2846 struct nft_set_elem elem; 3134 struct nft_set_elem elem;
3135 struct nft_trans *trans;
2847 int err; 3136 int err;
2848 3137
2849 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, 3138 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -2867,7 +3156,12 @@ static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
2867 if (err < 0) 3156 if (err < 0)
2868 goto err2; 3157 goto err2;
2869 3158
2870 set->ops->remove(set, &elem); 3159 trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
3160 if (trans == NULL)
3161 goto err2;
3162
3163 nft_trans_elem(trans) = elem;
3164 list_add(&trans->list, &ctx->net->nft.commit_list);
2871 3165
2872 nft_data_uninit(&elem.key, NFT_DATA_VALUE); 3166 nft_data_uninit(&elem.key, NFT_DATA_VALUE);
2873 if (set->flags & NFT_SET_MAP) 3167 if (set->flags & NFT_SET_MAP)
@@ -2886,9 +3180,9 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
2886 const struct nlattr *attr; 3180 const struct nlattr *attr;
2887 struct nft_set *set; 3181 struct nft_set *set;
2888 struct nft_ctx ctx; 3182 struct nft_ctx ctx;
2889 int rem, err; 3183 int rem, err = 0;
2890 3184
2891 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla); 3185 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla, false);
2892 if (err < 0) 3186 if (err < 0)
2893 return err; 3187 return err;
2894 3188
@@ -2901,14 +3195,14 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
2901 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 3195 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
2902 err = nft_del_setelem(&ctx, set, attr); 3196 err = nft_del_setelem(&ctx, set, attr);
2903 if (err < 0) 3197 if (err < 0)
2904 return err; 3198 break;
2905 } 3199 }
2906 return 0; 3200 return err;
2907} 3201}
2908 3202
2909static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { 3203static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2910 [NFT_MSG_NEWTABLE] = { 3204 [NFT_MSG_NEWTABLE] = {
2911 .call = nf_tables_newtable, 3205 .call_batch = nf_tables_newtable,
2912 .attr_count = NFTA_TABLE_MAX, 3206 .attr_count = NFTA_TABLE_MAX,
2913 .policy = nft_table_policy, 3207 .policy = nft_table_policy,
2914 }, 3208 },
@@ -2918,12 +3212,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2918 .policy = nft_table_policy, 3212 .policy = nft_table_policy,
2919 }, 3213 },
2920 [NFT_MSG_DELTABLE] = { 3214 [NFT_MSG_DELTABLE] = {
2921 .call = nf_tables_deltable, 3215 .call_batch = nf_tables_deltable,
2922 .attr_count = NFTA_TABLE_MAX, 3216 .attr_count = NFTA_TABLE_MAX,
2923 .policy = nft_table_policy, 3217 .policy = nft_table_policy,
2924 }, 3218 },
2925 [NFT_MSG_NEWCHAIN] = { 3219 [NFT_MSG_NEWCHAIN] = {
2926 .call = nf_tables_newchain, 3220 .call_batch = nf_tables_newchain,
2927 .attr_count = NFTA_CHAIN_MAX, 3221 .attr_count = NFTA_CHAIN_MAX,
2928 .policy = nft_chain_policy, 3222 .policy = nft_chain_policy,
2929 }, 3223 },
@@ -2933,7 +3227,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2933 .policy = nft_chain_policy, 3227 .policy = nft_chain_policy,
2934 }, 3228 },
2935 [NFT_MSG_DELCHAIN] = { 3229 [NFT_MSG_DELCHAIN] = {
2936 .call = nf_tables_delchain, 3230 .call_batch = nf_tables_delchain,
2937 .attr_count = NFTA_CHAIN_MAX, 3231 .attr_count = NFTA_CHAIN_MAX,
2938 .policy = nft_chain_policy, 3232 .policy = nft_chain_policy,
2939 }, 3233 },
@@ -2953,7 +3247,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2953 .policy = nft_rule_policy, 3247 .policy = nft_rule_policy,
2954 }, 3248 },
2955 [NFT_MSG_NEWSET] = { 3249 [NFT_MSG_NEWSET] = {
2956 .call = nf_tables_newset, 3250 .call_batch = nf_tables_newset,
2957 .attr_count = NFTA_SET_MAX, 3251 .attr_count = NFTA_SET_MAX,
2958 .policy = nft_set_policy, 3252 .policy = nft_set_policy,
2959 }, 3253 },
@@ -2963,12 +3257,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2963 .policy = nft_set_policy, 3257 .policy = nft_set_policy,
2964 }, 3258 },
2965 [NFT_MSG_DELSET] = { 3259 [NFT_MSG_DELSET] = {
2966 .call = nf_tables_delset, 3260 .call_batch = nf_tables_delset,
2967 .attr_count = NFTA_SET_MAX, 3261 .attr_count = NFTA_SET_MAX,
2968 .policy = nft_set_policy, 3262 .policy = nft_set_policy,
2969 }, 3263 },
2970 [NFT_MSG_NEWSETELEM] = { 3264 [NFT_MSG_NEWSETELEM] = {
2971 .call = nf_tables_newsetelem, 3265 .call_batch = nf_tables_newsetelem,
2972 .attr_count = NFTA_SET_ELEM_LIST_MAX, 3266 .attr_count = NFTA_SET_ELEM_LIST_MAX,
2973 .policy = nft_set_elem_list_policy, 3267 .policy = nft_set_elem_list_policy,
2974 }, 3268 },
@@ -2978,12 +3272,270 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2978 .policy = nft_set_elem_list_policy, 3272 .policy = nft_set_elem_list_policy,
2979 }, 3273 },
2980 [NFT_MSG_DELSETELEM] = { 3274 [NFT_MSG_DELSETELEM] = {
2981 .call = nf_tables_delsetelem, 3275 .call_batch = nf_tables_delsetelem,
2982 .attr_count = NFTA_SET_ELEM_LIST_MAX, 3276 .attr_count = NFTA_SET_ELEM_LIST_MAX,
2983 .policy = nft_set_elem_list_policy, 3277 .policy = nft_set_elem_list_policy,
2984 }, 3278 },
2985}; 3279};
2986 3280
3281static void nft_chain_commit_update(struct nft_trans *trans)
3282{
3283 struct nft_base_chain *basechain;
3284
3285 if (nft_trans_chain_name(trans)[0])
3286 strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans));
3287
3288 if (!(trans->ctx.chain->flags & NFT_BASE_CHAIN))
3289 return;
3290
3291 basechain = nft_base_chain(trans->ctx.chain);
3292 nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans));
3293
3294 switch (nft_trans_chain_policy(trans)) {
3295 case NF_DROP:
3296 case NF_ACCEPT:
3297 basechain->policy = nft_trans_chain_policy(trans);
3298 break;
3299 }
3300}
3301
3302/* Schedule objects for release via rcu to make sure no packets are accesing
3303 * removed rules.
3304 */
3305static void nf_tables_commit_release_rcu(struct rcu_head *rt)
3306{
3307 struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
3308
3309 switch (trans->msg_type) {
3310 case NFT_MSG_DELTABLE:
3311 nf_tables_table_destroy(&trans->ctx);
3312 break;
3313 case NFT_MSG_DELCHAIN:
3314 nf_tables_chain_destroy(trans->ctx.chain);
3315 break;
3316 case NFT_MSG_DELRULE:
3317 nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
3318 break;
3319 case NFT_MSG_DELSET:
3320 nft_set_destroy(nft_trans_set(trans));
3321 break;
3322 }
3323 kfree(trans);
3324}
3325
3326static int nf_tables_commit(struct sk_buff *skb)
3327{
3328 struct net *net = sock_net(skb->sk);
3329 struct nft_trans *trans, *next;
3330 struct nft_set *set;
3331
3332 /* Bump generation counter, invalidate any dump in progress */
3333 net->nft.genctr++;
3334
3335 /* A new generation has just started */
3336 net->nft.gencursor = gencursor_next(net);
3337
3338 /* Make sure all packets have left the previous generation before
3339 * purging old rules.
3340 */
3341 synchronize_rcu();
3342
3343 list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
3344 switch (trans->msg_type) {
3345 case NFT_MSG_NEWTABLE:
3346 if (nft_trans_table_update(trans)) {
3347 if (!nft_trans_table_enable(trans)) {
3348 nf_tables_table_disable(trans->ctx.afi,
3349 trans->ctx.table);
3350 trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
3351 }
3352 } else {
3353 trans->ctx.table->flags &= ~NFT_TABLE_INACTIVE;
3354 }
3355 nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE);
3356 nft_trans_destroy(trans);
3357 break;
3358 case NFT_MSG_DELTABLE:
3359 nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
3360 break;
3361 case NFT_MSG_NEWCHAIN:
3362 if (nft_trans_chain_update(trans))
3363 nft_chain_commit_update(trans);
3364 else {
3365 trans->ctx.chain->flags &= ~NFT_CHAIN_INACTIVE;
3366 trans->ctx.table->use++;
3367 }
3368 nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
3369 nft_trans_destroy(trans);
3370 break;
3371 case NFT_MSG_DELCHAIN:
3372 trans->ctx.table->use--;
3373 nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
3374 if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
3375 trans->ctx.chain->flags & NFT_BASE_CHAIN) {
3376 nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
3377 trans->ctx.afi->nops);
3378 }
3379 break;
3380 case NFT_MSG_NEWRULE:
3381 nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
3382 nf_tables_rule_notify(&trans->ctx,
3383 nft_trans_rule(trans),
3384 NFT_MSG_NEWRULE);
3385 nft_trans_destroy(trans);
3386 break;
3387 case NFT_MSG_DELRULE:
3388 list_del_rcu(&nft_trans_rule(trans)->list);
3389 nf_tables_rule_notify(&trans->ctx,
3390 nft_trans_rule(trans),
3391 NFT_MSG_DELRULE);
3392 break;
3393 case NFT_MSG_NEWSET:
3394 nft_trans_set(trans)->flags &= ~NFT_SET_INACTIVE;
3395 nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
3396 NFT_MSG_NEWSET);
3397 nft_trans_destroy(trans);
3398 break;
3399 case NFT_MSG_DELSET:
3400 nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
3401 NFT_MSG_DELSET);
3402 break;
3403 case NFT_MSG_NEWSETELEM:
3404 nft_trans_elem_set(trans)->nelems++;
3405 nf_tables_setelem_notify(&trans->ctx,
3406 nft_trans_elem_set(trans),
3407 &nft_trans_elem(trans),
3408 NFT_MSG_NEWSETELEM, 0);
3409 nft_trans_destroy(trans);
3410 break;
3411 case NFT_MSG_DELSETELEM:
3412 nft_trans_elem_set(trans)->nelems--;
3413 nf_tables_setelem_notify(&trans->ctx,
3414 nft_trans_elem_set(trans),
3415 &nft_trans_elem(trans),
3416 NFT_MSG_DELSETELEM, 0);
3417 set = nft_trans_elem_set(trans);
3418 set->ops->get(set, &nft_trans_elem(trans));
3419 set->ops->remove(set, &nft_trans_elem(trans));
3420 nft_trans_destroy(trans);
3421 break;
3422 }
3423 }
3424
3425 list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
3426 list_del(&trans->list);
3427 trans->ctx.nla = NULL;
3428 call_rcu(&trans->rcu_head, nf_tables_commit_release_rcu);
3429 }
3430
3431 return 0;
3432}
3433
3434/* Schedule objects for release via rcu to make sure no packets are accesing
3435 * aborted rules.
3436 */
3437static void nf_tables_abort_release_rcu(struct rcu_head *rt)
3438{
3439 struct nft_trans *trans = container_of(rt, struct nft_trans, rcu_head);
3440
3441 switch (trans->msg_type) {
3442 case NFT_MSG_NEWTABLE:
3443 nf_tables_table_destroy(&trans->ctx);
3444 break;
3445 case NFT_MSG_NEWCHAIN:
3446 nf_tables_chain_destroy(trans->ctx.chain);
3447 break;
3448 case NFT_MSG_NEWRULE:
3449 nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
3450 break;
3451 case NFT_MSG_NEWSET:
3452 nft_set_destroy(nft_trans_set(trans));
3453 break;
3454 }
3455 kfree(trans);
3456}
3457
3458static int nf_tables_abort(struct sk_buff *skb)
3459{
3460 struct net *net = sock_net(skb->sk);
3461 struct nft_trans *trans, *next;
3462 struct nft_set *set;
3463
3464 list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
3465 switch (trans->msg_type) {
3466 case NFT_MSG_NEWTABLE:
3467 if (nft_trans_table_update(trans)) {
3468 if (nft_trans_table_enable(trans)) {
3469 nf_tables_table_disable(trans->ctx.afi,
3470 trans->ctx.table);
3471 trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
3472 }
3473 nft_trans_destroy(trans);
3474 } else {
3475 list_del(&trans->ctx.table->list);
3476 }
3477 break;
3478 case NFT_MSG_DELTABLE:
3479 list_add_tail(&trans->ctx.table->list,
3480 &trans->ctx.afi->tables);
3481 nft_trans_destroy(trans);
3482 break;
3483 case NFT_MSG_NEWCHAIN:
3484 if (nft_trans_chain_update(trans)) {
3485 if (nft_trans_chain_stats(trans))
3486 free_percpu(nft_trans_chain_stats(trans));
3487
3488 nft_trans_destroy(trans);
3489 } else {
3490 list_del(&trans->ctx.chain->list);
3491 if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT) &&
3492 trans->ctx.chain->flags & NFT_BASE_CHAIN) {
3493 nf_unregister_hooks(nft_base_chain(trans->ctx.chain)->ops,
3494 trans->ctx.afi->nops);
3495 }
3496 }
3497 break;
3498 case NFT_MSG_DELCHAIN:
3499 list_add_tail(&trans->ctx.chain->list,
3500 &trans->ctx.table->chains);
3501 nft_trans_destroy(trans);
3502 break;
3503 case NFT_MSG_NEWRULE:
3504 list_del_rcu(&nft_trans_rule(trans)->list);
3505 break;
3506 case NFT_MSG_DELRULE:
3507 nft_rule_clear(trans->ctx.net, nft_trans_rule(trans));
3508 nft_trans_destroy(trans);
3509 break;
3510 case NFT_MSG_NEWSET:
3511 list_del(&nft_trans_set(trans)->list);
3512 break;
3513 case NFT_MSG_DELSET:
3514 list_add_tail(&nft_trans_set(trans)->list,
3515 &trans->ctx.table->sets);
3516 nft_trans_destroy(trans);
3517 break;
3518 case NFT_MSG_NEWSETELEM:
3519 set = nft_trans_elem_set(trans);
3520 set->ops->get(set, &nft_trans_elem(trans));
3521 set->ops->remove(set, &nft_trans_elem(trans));
3522 nft_trans_destroy(trans);
3523 break;
3524 case NFT_MSG_DELSETELEM:
3525 nft_trans_destroy(trans);
3526 break;
3527 }
3528 }
3529
3530 list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
3531 list_del(&trans->list);
3532 trans->ctx.nla = NULL;
3533 call_rcu(&trans->rcu_head, nf_tables_abort_release_rcu);
3534 }
3535
3536 return 0;
3537}
3538
2987static const struct nfnetlink_subsystem nf_tables_subsys = { 3539static const struct nfnetlink_subsystem nf_tables_subsys = {
2988 .name = "nf_tables", 3540 .name = "nf_tables",
2989 .subsys_id = NFNL_SUBSYS_NFTABLES, 3541 .subsys_id = NFNL_SUBSYS_NFTABLES,
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index bd0d41e69341..cc5603016242 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -215,22 +215,14 @@ static void nft_ct_l3proto_module_put(uint8_t family)
215 nf_ct_l3proto_module_put(family); 215 nf_ct_l3proto_module_put(family);
216} 216}
217 217
218static int nft_ct_init_validate_get(const struct nft_expr *expr, 218static int nft_ct_get_init(const struct nft_ctx *ctx,
219 const struct nlattr * const tb[]) 219 const struct nft_expr *expr,
220 const struct nlattr * const tb[])
220{ 221{
221 struct nft_ct *priv = nft_expr_priv(expr); 222 struct nft_ct *priv = nft_expr_priv(expr);
223 int err;
222 224
223 if (tb[NFTA_CT_DIRECTION] != NULL) { 225 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
224 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
225 switch (priv->dir) {
226 case IP_CT_DIR_ORIGINAL:
227 case IP_CT_DIR_REPLY:
228 break;
229 default:
230 return -EINVAL;
231 }
232 }
233
234 switch (priv->key) { 226 switch (priv->key) {
235 case NFT_CT_STATE: 227 case NFT_CT_STATE:
236 case NFT_CT_DIRECTION: 228 case NFT_CT_DIRECTION:
@@ -262,55 +254,55 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
262 return -EOPNOTSUPP; 254 return -EOPNOTSUPP;
263 } 255 }
264 256
265 return 0; 257 if (tb[NFTA_CT_DIRECTION] != NULL) {
266} 258 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
267 259 switch (priv->dir) {
268static int nft_ct_init_validate_set(uint32_t key) 260 case IP_CT_DIR_ORIGINAL:
269{ 261 case IP_CT_DIR_REPLY:
270 switch (key) { 262 break;
271 case NFT_CT_MARK: 263 default:
272 break; 264 return -EINVAL;
273 default: 265 }
274 return -EOPNOTSUPP;
275 } 266 }
276 267
268 priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
269 err = nft_validate_output_register(priv->dreg);
270 if (err < 0)
271 return err;
272
273 err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
274 if (err < 0)
275 return err;
276
277 err = nft_ct_l3proto_try_module_get(ctx->afi->family);
278 if (err < 0)
279 return err;
280
277 return 0; 281 return 0;
278} 282}
279 283
280static int nft_ct_init(const struct nft_ctx *ctx, 284static int nft_ct_set_init(const struct nft_ctx *ctx,
281 const struct nft_expr *expr, 285 const struct nft_expr *expr,
282 const struct nlattr * const tb[]) 286 const struct nlattr * const tb[])
283{ 287{
284 struct nft_ct *priv = nft_expr_priv(expr); 288 struct nft_ct *priv = nft_expr_priv(expr);
285 int err; 289 int err;
286 290
287 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); 291 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
288 292 switch (priv->key) {
289 if (tb[NFTA_CT_DREG]) { 293#ifdef CONFIG_NF_CONNTRACK_MARK
290 err = nft_ct_init_validate_get(expr, tb); 294 case NFT_CT_MARK:
291 if (err < 0) 295 break;
292 return err; 296#endif
293 297 default:
294 priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); 298 return -EOPNOTSUPP;
295 err = nft_validate_output_register(priv->dreg);
296 if (err < 0)
297 return err;
298
299 err = nft_validate_data_load(ctx, priv->dreg, NULL,
300 NFT_DATA_VALUE);
301 if (err < 0)
302 return err;
303 } else {
304 err = nft_ct_init_validate_set(priv->key);
305 if (err < 0)
306 return err;
307
308 priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
309 err = nft_validate_input_register(priv->sreg);
310 if (err < 0)
311 return err;
312 } 299 }
313 300
301 priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG]));
302 err = nft_validate_input_register(priv->sreg);
303 if (err < 0)
304 return err;
305
314 err = nft_ct_l3proto_try_module_get(ctx->afi->family); 306 err = nft_ct_l3proto_try_module_get(ctx->afi->family);
315 if (err < 0) 307 if (err < 0)
316 return err; 308 return err;
@@ -370,7 +362,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
370 .type = &nft_ct_type, 362 .type = &nft_ct_type,
371 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), 363 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
372 .eval = nft_ct_get_eval, 364 .eval = nft_ct_get_eval,
373 .init = nft_ct_init, 365 .init = nft_ct_get_init,
374 .destroy = nft_ct_destroy, 366 .destroy = nft_ct_destroy,
375 .dump = nft_ct_get_dump, 367 .dump = nft_ct_get_dump,
376}; 368};
@@ -379,7 +371,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
379 .type = &nft_ct_type, 371 .type = &nft_ct_type,
380 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), 372 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
381 .eval = nft_ct_set_eval, 373 .eval = nft_ct_set_eval,
382 .init = nft_ct_init, 374 .init = nft_ct_set_init,
383 .destroy = nft_ct_destroy, 375 .destroy = nft_ct_destroy,
384 .dump = nft_ct_set_dump, 376 .dump = nft_ct_set_dump,
385}; 377};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3b1ad876d6b0..1dfeb6786832 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -12,6 +12,7 @@
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/list.h> 14#include <linux/list.h>
15#include <linux/log2.h>
15#include <linux/jhash.h> 16#include <linux/jhash.h>
16#include <linux/netlink.h> 17#include <linux/netlink.h>
17#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
@@ -19,7 +20,7 @@
19#include <linux/netfilter/nf_tables.h> 20#include <linux/netfilter/nf_tables.h>
20#include <net/netfilter/nf_tables.h> 21#include <net/netfilter/nf_tables.h>
21 22
22#define NFT_HASH_MIN_SIZE 4 23#define NFT_HASH_MIN_SIZE 4UL
23 24
24struct nft_hash { 25struct nft_hash {
25 struct nft_hash_table __rcu *tbl; 26 struct nft_hash_table __rcu *tbl;
@@ -27,7 +28,6 @@ struct nft_hash {
27 28
28struct nft_hash_table { 29struct nft_hash_table {
29 unsigned int size; 30 unsigned int size;
30 unsigned int elements;
31 struct nft_hash_elem __rcu *buckets[]; 31 struct nft_hash_elem __rcu *buckets[];
32}; 32};
33 33
@@ -82,6 +82,11 @@ static void nft_hash_tbl_free(const struct nft_hash_table *tbl)
82 kfree(tbl); 82 kfree(tbl);
83} 83}
84 84
85static unsigned int nft_hash_tbl_size(unsigned int nelem)
86{
87 return max(roundup_pow_of_two(nelem * 4 / 3), NFT_HASH_MIN_SIZE);
88}
89
85static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets) 90static struct nft_hash_table *nft_hash_tbl_alloc(unsigned int nbuckets)
86{ 91{
87 struct nft_hash_table *tbl; 92 struct nft_hash_table *tbl;
@@ -161,7 +166,6 @@ static int nft_hash_tbl_expand(const struct nft_set *set, struct nft_hash *priv)
161 break; 166 break;
162 } 167 }
163 } 168 }
164 ntbl->elements = tbl->elements;
165 169
166 /* Publish new table */ 170 /* Publish new table */
167 rcu_assign_pointer(priv->tbl, ntbl); 171 rcu_assign_pointer(priv->tbl, ntbl);
@@ -201,7 +205,6 @@ static int nft_hash_tbl_shrink(const struct nft_set *set, struct nft_hash *priv)
201 ; 205 ;
202 RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); 206 RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
203 } 207 }
204 ntbl->elements = tbl->elements;
205 208
206 /* Publish new table */ 209 /* Publish new table */
207 rcu_assign_pointer(priv->tbl, ntbl); 210 rcu_assign_pointer(priv->tbl, ntbl);
@@ -237,10 +240,9 @@ static int nft_hash_insert(const struct nft_set *set,
237 h = nft_hash_data(&he->key, tbl->size, set->klen); 240 h = nft_hash_data(&he->key, tbl->size, set->klen);
238 RCU_INIT_POINTER(he->next, tbl->buckets[h]); 241 RCU_INIT_POINTER(he->next, tbl->buckets[h]);
239 rcu_assign_pointer(tbl->buckets[h], he); 242 rcu_assign_pointer(tbl->buckets[h], he);
240 tbl->elements++;
241 243
242 /* Expand table when exceeding 75% load */ 244 /* Expand table when exceeding 75% load */
243 if (tbl->elements > tbl->size / 4 * 3) 245 if (set->nelems + 1 > tbl->size / 4 * 3)
244 nft_hash_tbl_expand(set, priv); 246 nft_hash_tbl_expand(set, priv);
245 247
246 return 0; 248 return 0;
@@ -268,10 +270,9 @@ static void nft_hash_remove(const struct nft_set *set,
268 RCU_INIT_POINTER(*pprev, he->next); 270 RCU_INIT_POINTER(*pprev, he->next);
269 synchronize_rcu(); 271 synchronize_rcu();
270 kfree(he); 272 kfree(he);
271 tbl->elements--;
272 273
273 /* Shrink table beneath 30% load */ 274 /* Shrink table beneath 30% load */
274 if (tbl->elements < tbl->size * 3 / 10 && 275 if (set->nelems - 1 < tbl->size * 3 / 10 &&
275 tbl->size > NFT_HASH_MIN_SIZE) 276 tbl->size > NFT_HASH_MIN_SIZE)
276 nft_hash_tbl_shrink(set, priv); 277 nft_hash_tbl_shrink(set, priv);
277} 278}
@@ -335,17 +336,23 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
335} 336}
336 337
337static int nft_hash_init(const struct nft_set *set, 338static int nft_hash_init(const struct nft_set *set,
339 const struct nft_set_desc *desc,
338 const struct nlattr * const tb[]) 340 const struct nlattr * const tb[])
339{ 341{
340 struct nft_hash *priv = nft_set_priv(set); 342 struct nft_hash *priv = nft_set_priv(set);
341 struct nft_hash_table *tbl; 343 struct nft_hash_table *tbl;
344 unsigned int size;
342 345
343 if (unlikely(!nft_hash_rnd_initted)) { 346 if (unlikely(!nft_hash_rnd_initted)) {
344 get_random_bytes(&nft_hash_rnd, 4); 347 get_random_bytes(&nft_hash_rnd, 4);
345 nft_hash_rnd_initted = true; 348 nft_hash_rnd_initted = true;
346 } 349 }
347 350
348 tbl = nft_hash_tbl_alloc(NFT_HASH_MIN_SIZE); 351 size = NFT_HASH_MIN_SIZE;
352 if (desc->size)
353 size = nft_hash_tbl_size(desc->size);
354
355 tbl = nft_hash_tbl_alloc(size);
349 if (tbl == NULL) 356 if (tbl == NULL)
350 return -ENOMEM; 357 return -ENOMEM;
351 RCU_INIT_POINTER(priv->tbl, tbl); 358 RCU_INIT_POINTER(priv->tbl, tbl);
@@ -369,8 +376,37 @@ static void nft_hash_destroy(const struct nft_set *set)
369 kfree(tbl); 376 kfree(tbl);
370} 377}
371 378
379static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
380 struct nft_set_estimate *est)
381{
382 unsigned int esize;
383
384 esize = sizeof(struct nft_hash_elem);
385 if (features & NFT_SET_MAP)
386 esize += FIELD_SIZEOF(struct nft_hash_elem, data[0]);
387
388 if (desc->size) {
389 est->size = sizeof(struct nft_hash) +
390 nft_hash_tbl_size(desc->size) *
391 sizeof(struct nft_hash_elem *) +
392 desc->size * esize;
393 } else {
394 /* Resizing happens when the load drops below 30% or goes
395 * above 75%. The average of 52.5% load (approximated by 50%)
396 * is used for the size estimation of the hash buckets,
397 * meaning we calculate two buckets per element.
398 */
399 est->size = esize + 2 * sizeof(struct nft_hash_elem *);
400 }
401
402 est->class = NFT_SET_CLASS_O_1;
403
404 return true;
405}
406
372static struct nft_set_ops nft_hash_ops __read_mostly = { 407static struct nft_set_ops nft_hash_ops __read_mostly = {
373 .privsize = nft_hash_privsize, 408 .privsize = nft_hash_privsize,
409 .estimate = nft_hash_estimate,
374 .init = nft_hash_init, 410 .init = nft_hash_init,
375 .destroy = nft_hash_destroy, 411 .destroy = nft_hash_destroy,
376 .get = nft_hash_get, 412 .get = nft_hash_get,
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 7fd2bea8aa23..6404a726d17b 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -56,8 +56,14 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
56 return -EINVAL; 56 return -EINVAL;
57 57
58 set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]); 58 set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
59 if (IS_ERR(set)) 59 if (IS_ERR(set)) {
60 return PTR_ERR(set); 60 if (tb[NFTA_LOOKUP_SET_ID]) {
61 set = nf_tables_set_lookup_byid(ctx->net,
62 tb[NFTA_LOOKUP_SET_ID]);
63 }
64 if (IS_ERR(set))
65 return PTR_ERR(set);
66 }
61 67
62 priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); 68 priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
63 err = nft_validate_input_register(priv->sreg); 69 err = nft_validate_input_register(priv->sreg);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 425cf39af890..852b178c6ae7 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,18 +18,11 @@
18#include <net/sock.h> 18#include <net/sock.h>
19#include <net/tcp_states.h> /* for TCP_TIME_WAIT */ 19#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
20#include <net/netfilter/nf_tables.h> 20#include <net/netfilter/nf_tables.h>
21#include <net/netfilter/nft_meta.h>
21 22
22struct nft_meta { 23void nft_meta_get_eval(const struct nft_expr *expr,
23 enum nft_meta_keys key:8; 24 struct nft_data data[NFT_REG_MAX + 1],
24 union { 25 const struct nft_pktinfo *pkt)
25 enum nft_registers dreg:8;
26 enum nft_registers sreg:8;
27 };
28};
29
30static void nft_meta_get_eval(const struct nft_expr *expr,
31 struct nft_data data[NFT_REG_MAX + 1],
32 const struct nft_pktinfo *pkt)
33{ 26{
34 const struct nft_meta *priv = nft_expr_priv(expr); 27 const struct nft_meta *priv = nft_expr_priv(expr);
35 const struct sk_buff *skb = pkt->skb; 28 const struct sk_buff *skb = pkt->skb;
@@ -140,10 +133,11 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
140err: 133err:
141 data[NFT_REG_VERDICT].verdict = NFT_BREAK; 134 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
142} 135}
136EXPORT_SYMBOL_GPL(nft_meta_get_eval);
143 137
144static void nft_meta_set_eval(const struct nft_expr *expr, 138void nft_meta_set_eval(const struct nft_expr *expr,
145 struct nft_data data[NFT_REG_MAX + 1], 139 struct nft_data data[NFT_REG_MAX + 1],
146 const struct nft_pktinfo *pkt) 140 const struct nft_pktinfo *pkt)
147{ 141{
148 const struct nft_meta *meta = nft_expr_priv(expr); 142 const struct nft_meta *meta = nft_expr_priv(expr);
149 struct sk_buff *skb = pkt->skb; 143 struct sk_buff *skb = pkt->skb;
@@ -163,28 +157,24 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
163 WARN_ON(1); 157 WARN_ON(1);
164 } 158 }
165} 159}
160EXPORT_SYMBOL_GPL(nft_meta_set_eval);
166 161
167static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { 162const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
168 [NFTA_META_DREG] = { .type = NLA_U32 }, 163 [NFTA_META_DREG] = { .type = NLA_U32 },
169 [NFTA_META_KEY] = { .type = NLA_U32 }, 164 [NFTA_META_KEY] = { .type = NLA_U32 },
170 [NFTA_META_SREG] = { .type = NLA_U32 }, 165 [NFTA_META_SREG] = { .type = NLA_U32 },
171}; 166};
167EXPORT_SYMBOL_GPL(nft_meta_policy);
172 168
173static int nft_meta_init_validate_set(uint32_t key) 169int nft_meta_get_init(const struct nft_ctx *ctx,
170 const struct nft_expr *expr,
171 const struct nlattr * const tb[])
174{ 172{
175 switch (key) { 173 struct nft_meta *priv = nft_expr_priv(expr);
176 case NFT_META_MARK: 174 int err;
177 case NFT_META_PRIORITY:
178 case NFT_META_NFTRACE:
179 return 0;
180 default:
181 return -EOPNOTSUPP;
182 }
183}
184 175
185static int nft_meta_init_validate_get(uint32_t key) 176 priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
186{ 177 switch (priv->key) {
187 switch (key) {
188 case NFT_META_LEN: 178 case NFT_META_LEN:
189 case NFT_META_PROTOCOL: 179 case NFT_META_PROTOCOL:
190 case NFT_META_NFPROTO: 180 case NFT_META_NFPROTO:
@@ -205,39 +195,41 @@ static int nft_meta_init_validate_get(uint32_t key)
205#ifdef CONFIG_NETWORK_SECMARK 195#ifdef CONFIG_NETWORK_SECMARK
206 case NFT_META_SECMARK: 196 case NFT_META_SECMARK:
207#endif 197#endif
208 return 0; 198 break;
209 default: 199 default:
210 return -EOPNOTSUPP; 200 return -EOPNOTSUPP;
211 } 201 }
212 202
203 priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
204 err = nft_validate_output_register(priv->dreg);
205 if (err < 0)
206 return err;
207
208 err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
209 if (err < 0)
210 return err;
211
212 return 0;
213} 213}
214EXPORT_SYMBOL_GPL(nft_meta_get_init);
214 215
215static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr, 216int nft_meta_set_init(const struct nft_ctx *ctx,
216 const struct nlattr * const tb[]) 217 const struct nft_expr *expr,
218 const struct nlattr * const tb[])
217{ 219{
218 struct nft_meta *priv = nft_expr_priv(expr); 220 struct nft_meta *priv = nft_expr_priv(expr);
219 int err; 221 int err;
220 222
221 priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); 223 priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
222 224 switch (priv->key) {
223 if (tb[NFTA_META_DREG]) { 225 case NFT_META_MARK:
224 err = nft_meta_init_validate_get(priv->key); 226 case NFT_META_PRIORITY:
225 if (err < 0) 227 case NFT_META_NFTRACE:
226 return err; 228 break;
227 229 default:
228 priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); 230 return -EOPNOTSUPP;
229 err = nft_validate_output_register(priv->dreg);
230 if (err < 0)
231 return err;
232
233 return nft_validate_data_load(ctx, priv->dreg, NULL,
234 NFT_DATA_VALUE);
235 } 231 }
236 232
237 err = nft_meta_init_validate_set(priv->key);
238 if (err < 0)
239 return err;
240
241 priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); 233 priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG]));
242 err = nft_validate_input_register(priv->sreg); 234 err = nft_validate_input_register(priv->sreg);
243 if (err < 0) 235 if (err < 0)
@@ -245,9 +237,10 @@ static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
245 237
246 return 0; 238 return 0;
247} 239}
240EXPORT_SYMBOL_GPL(nft_meta_set_init);
248 241
249static int nft_meta_get_dump(struct sk_buff *skb, 242int nft_meta_get_dump(struct sk_buff *skb,
250 const struct nft_expr *expr) 243 const struct nft_expr *expr)
251{ 244{
252 const struct nft_meta *priv = nft_expr_priv(expr); 245 const struct nft_meta *priv = nft_expr_priv(expr);
253 246
@@ -260,9 +253,10 @@ static int nft_meta_get_dump(struct sk_buff *skb,
260nla_put_failure: 253nla_put_failure:
261 return -1; 254 return -1;
262} 255}
256EXPORT_SYMBOL_GPL(nft_meta_get_dump);
263 257
264static int nft_meta_set_dump(struct sk_buff *skb, 258int nft_meta_set_dump(struct sk_buff *skb,
265 const struct nft_expr *expr) 259 const struct nft_expr *expr)
266{ 260{
267 const struct nft_meta *priv = nft_expr_priv(expr); 261 const struct nft_meta *priv = nft_expr_priv(expr);
268 262
@@ -276,13 +270,14 @@ static int nft_meta_set_dump(struct sk_buff *skb,
276nla_put_failure: 270nla_put_failure:
277 return -1; 271 return -1;
278} 272}
273EXPORT_SYMBOL_GPL(nft_meta_set_dump);
279 274
280static struct nft_expr_type nft_meta_type; 275static struct nft_expr_type nft_meta_type;
281static const struct nft_expr_ops nft_meta_get_ops = { 276static const struct nft_expr_ops nft_meta_get_ops = {
282 .type = &nft_meta_type, 277 .type = &nft_meta_type,
283 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), 278 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
284 .eval = nft_meta_get_eval, 279 .eval = nft_meta_get_eval,
285 .init = nft_meta_init, 280 .init = nft_meta_get_init,
286 .dump = nft_meta_get_dump, 281 .dump = nft_meta_get_dump,
287}; 282};
288 283
@@ -290,7 +285,7 @@ static const struct nft_expr_ops nft_meta_set_ops = {
290 .type = &nft_meta_type, 285 .type = &nft_meta_type,
291 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), 286 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
292 .eval = nft_meta_set_eval, 287 .eval = nft_meta_set_eval,
293 .init = nft_meta_init, 288 .init = nft_meta_set_init,
294 .dump = nft_meta_set_dump, 289 .dump = nft_meta_set_dump,
295}; 290};
296 291
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index e21d69d13506..072e611e9f71 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -201,6 +201,7 @@ static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
201} 201}
202 202
203static int nft_rbtree_init(const struct nft_set *set, 203static int nft_rbtree_init(const struct nft_set *set,
204 const struct nft_set_desc *desc,
204 const struct nlattr * const nla[]) 205 const struct nlattr * const nla[])
205{ 206{
206 struct nft_rbtree *priv = nft_set_priv(set); 207 struct nft_rbtree *priv = nft_set_priv(set);
@@ -222,8 +223,28 @@ static void nft_rbtree_destroy(const struct nft_set *set)
222 } 223 }
223} 224}
224 225
226static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
227 struct nft_set_estimate *est)
228{
229 unsigned int nsize;
230
231 nsize = sizeof(struct nft_rbtree_elem);
232 if (features & NFT_SET_MAP)
233 nsize += FIELD_SIZEOF(struct nft_rbtree_elem, data[0]);
234
235 if (desc->size)
236 est->size = sizeof(struct nft_rbtree) + desc->size * nsize;
237 else
238 est->size = nsize;
239
240 est->class = NFT_SET_CLASS_O_LOG_N;
241
242 return true;
243}
244
225static struct nft_set_ops nft_rbtree_ops __read_mostly = { 245static struct nft_set_ops nft_rbtree_ops __read_mostly = {
226 .privsize = nft_rbtree_privsize, 246 .privsize = nft_rbtree_privsize,
247 .estimate = nft_rbtree_estimate,
227 .init = nft_rbtree_init, 248 .init = nft_rbtree_init,
228 .destroy = nft_rbtree_destroy, 249 .destroy = nft_rbtree_destroy,
229 .insert = nft_rbtree_insert, 250 .insert = nft_rbtree_insert,