aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2013-10-14 05:05:33 -0400
committerPablo Neira Ayuso <pablo@netfilter.org>2013-10-14 12:01:01 -0400
commit0628b123c96d126e617beb3b4fd63b874d0e4f17 (patch)
tree78726af17d726e96d2bf9b28bf6a6d16222c2049
parent5e94846686d027a4c8ecc5d9d52b18036d3e8f7a (diff)
netfilter: nfnetlink: add batch support and use it from nf_tables
This patch adds a batch support to nfnetlink. Basically, it adds two new control messages: * NFNL_MSG_BATCH_BEGIN, that indicates the beginning of a batch, the nfgenmsg->res_id indicates the nfnetlink subsystem ID. * NFNL_MSG_BATCH_END, that results in the invocation of the ss->commit callback function. If not specified or an error ocurred in the batch, the ss->abort function is invoked instead. The end message represents the commit operation in nftables, the lack of end message results in an abort. This patch also adds the .call_batch function that is only called from the batch receival path. This patch adds atomic rule updates and dumps based on bitmask generations. This allows to atomically commit a set of rule-set updates incrementally without altering the internal state of existing nf_tables expressions/matches/targets. The idea consists of using a generation cursor of 1 bit and a bitmask of 2 bits per rule. Assuming the gencursor is 0, then the genmask (expressed as a bitmask) can be interpreted as: 00 active in the present, will be active in the next generation. 01 inactive in the present, will be active in the next generation. 10 active in the present, will be deleted in the next generation. ^ gencursor Once you invoke the transition to the next generation, the global gencursor is updated: 00 active in the present, will be active in the next generation. 01 active in the present, needs to zero its future, it becomes 00. 10 inactive in the present, delete now. ^ gencursor If a dump is in progress and nf_tables enters a new generation, the dump will stop and return -EBUSY to let userspace know that it has to retry again. In order to invalidate dumps, a global genctr counter is increased everytime nf_tables enters a new generation. This new operation can be used from the user-space utility that controls the firewall, eg. nft -f restore The rule updates contained in `file' will be applied atomically. cat file ----- add filter INPUT ip saddr 1.1.1.1 counter accept #1 del filter INPUT ip daddr 2.2.2.2 counter drop #2 -EOF- Note that the rule 1 will be inactive until the transition to the next generation, the rule 2 will be evicted in the next generation. There is a penalty during the rule update due to the branch misprediction in the packet matching framework. But that should be quickly resolved once the iteration over the commit list that contain rules that require updates is finished. Event notification happens once the rule-set update has been committed. So we skip notifications is case the rule-set update is aborted, which can happen in case that the rule-set is tested to apply correctly. This patch squashed the following patches from Pablo: * nf_tables: atomic rule updates and dumps * nf_tables: get rid of per rule list_head for commits * nf_tables: use per netns commit list * nfnetlink: add batch support and use it from nf_tables * nf_tables: all rule updates are transactional * nf_tables: attach replacement rule after stale one * nf_tables: do not allow deletion/replacement of stale rules * nf_tables: remove unused NFTA_RULE_FLAGS Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
-rw-r--r--include/linux/netfilter/nfnetlink.h5
-rw-r--r--include/net/netfilter/nf_tables.h25
-rw-r--r--include/net/netns/nftables.h3
-rw-r--r--include/uapi/linux/netfilter/nfnetlink.h4
-rw-r--r--net/netfilter/nf_tables_api.c202
-rw-r--r--net/netfilter/nf_tables_core.c10
-rw-r--r--net/netfilter/nfnetlink.c175
7 files changed, 401 insertions, 23 deletions
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4f68cd7141d2..28c74367e900 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,6 +14,9 @@ struct nfnl_callback {
14 int (*call_rcu)(struct sock *nl, struct sk_buff *skb, 14 int (*call_rcu)(struct sock *nl, struct sk_buff *skb,
15 const struct nlmsghdr *nlh, 15 const struct nlmsghdr *nlh,
16 const struct nlattr * const cda[]); 16 const struct nlattr * const cda[]);
17 int (*call_batch)(struct sock *nl, struct sk_buff *skb,
18 const struct nlmsghdr *nlh,
19 const struct nlattr * const cda[]);
17 const struct nla_policy *policy; /* netlink attribute policy */ 20 const struct nla_policy *policy; /* netlink attribute policy */
18 const u_int16_t attr_count; /* number of nlattr's */ 21 const u_int16_t attr_count; /* number of nlattr's */
19}; 22};
@@ -23,6 +26,8 @@ struct nfnetlink_subsystem {
23 __u8 subsys_id; /* nfnetlink subsystem ID */ 26 __u8 subsys_id; /* nfnetlink subsystem ID */
24 __u8 cb_count; /* number of callbacks */ 27 __u8 cb_count; /* number of callbacks */
25 const struct nfnl_callback *cb; /* callback for individual types */ 28 const struct nfnl_callback *cb; /* callback for individual types */
29 int (*commit)(struct sk_buff *skb);
30 int (*abort)(struct sk_buff *skb);
26}; 31};
27 32
28int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); 33int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d3272e943aac..975ad3c573c7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -323,18 +323,39 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
323 * @list: used internally 323 * @list: used internally
324 * @rcu_head: used internally for rcu 324 * @rcu_head: used internally for rcu
325 * @handle: rule handle 325 * @handle: rule handle
326 * @genmask: generation mask
326 * @dlen: length of expression data 327 * @dlen: length of expression data
327 * @data: expression data 328 * @data: expression data
328 */ 329 */
329struct nft_rule { 330struct nft_rule {
330 struct list_head list; 331 struct list_head list;
331 struct rcu_head rcu_head; 332 struct rcu_head rcu_head;
332 u64 handle:48, 333 u64 handle:46,
334 genmask:2,
333 dlen:16; 335 dlen:16;
334 unsigned char data[] 336 unsigned char data[]
335 __attribute__((aligned(__alignof__(struct nft_expr)))); 337 __attribute__((aligned(__alignof__(struct nft_expr))));
336}; 338};
337 339
340/**
341 * struct nft_rule_trans - nf_tables rule update in transaction
342 *
343 * @list: used internally
344 * @rule: rule that needs to be updated
345 * @chain: chain that this rule belongs to
346 * @table: table for which this chain applies
347 * @nlh: netlink header of the message that contain this update
348 * @family: family expressesed as AF_*
349 */
350struct nft_rule_trans {
351 struct list_head list;
352 struct nft_rule *rule;
353 const struct nft_chain *chain;
354 const struct nft_table *table;
355 const struct nlmsghdr *nlh;
356 u8 family;
357};
358
338static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) 359static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
339{ 360{
340 return (struct nft_expr *)&rule->data[0]; 361 return (struct nft_expr *)&rule->data[0];
@@ -370,6 +391,7 @@ enum nft_chain_flags {
370 * @rules: list of rules in the chain 391 * @rules: list of rules in the chain
371 * @list: used internally 392 * @list: used internally
372 * @rcu_head: used internally 393 * @rcu_head: used internally
394 * @net: net namespace that this chain belongs to
373 * @handle: chain handle 395 * @handle: chain handle
374 * @flags: bitmask of enum nft_chain_flags 396 * @flags: bitmask of enum nft_chain_flags
375 * @use: number of jump references to this chain 397 * @use: number of jump references to this chain
@@ -380,6 +402,7 @@ struct nft_chain {
380 struct list_head rules; 402 struct list_head rules;
381 struct list_head list; 403 struct list_head list;
382 struct rcu_head rcu_head; 404 struct rcu_head rcu_head;
405 struct net *net;
383 u64 handle; 406 u64 handle;
384 u8 flags; 407 u8 flags;
385 u16 use; 408 u16 use;
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index a98b1c5d9913..08a4248a12b5 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,9 +7,12 @@ struct nft_af_info;
7 7
8struct netns_nftables { 8struct netns_nftables {
9 struct list_head af_info; 9 struct list_head af_info;
10 struct list_head commit_list;
10 struct nft_af_info *ipv4; 11 struct nft_af_info *ipv4;
11 struct nft_af_info *ipv6; 12 struct nft_af_info *ipv6;
12 struct nft_af_info *bridge; 13 struct nft_af_info *bridge;
14 u8 gencursor;
15 u8 genctr;
13}; 16};
14 17
15#endif 18#endif
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 288959404d54..596ddd45253c 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -57,4 +57,8 @@ struct nfgenmsg {
57#define NFNL_SUBSYS_NFT_COMPAT 11 57#define NFNL_SUBSYS_NFT_COMPAT 11
58#define NFNL_SUBSYS_COUNT 12 58#define NFNL_SUBSYS_COUNT 12
59 59
60/* Reserved control nfnetlink messages */
61#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE
62#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1
63
60#endif /* _UAPI_NFNETLINK_H */ 64#endif /* _UAPI_NFNETLINK_H */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 0f140663ec71..79e1418a6043 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -978,6 +978,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
978 978
979 INIT_LIST_HEAD(&chain->rules); 979 INIT_LIST_HEAD(&chain->rules);
980 chain->handle = nf_tables_alloc_handle(table); 980 chain->handle = nf_tables_alloc_handle(table);
981 chain->net = net;
981 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); 982 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
982 983
983 if (!(table->flags & NFT_TABLE_F_DORMANT) && 984 if (!(table->flags & NFT_TABLE_F_DORMANT) &&
@@ -1371,6 +1372,41 @@ err:
1371 return err; 1372 return err;
1372} 1373}
1373 1374
1375static inline bool
1376nft_rule_is_active(struct net *net, const struct nft_rule *rule)
1377{
1378 return (rule->genmask & (1 << net->nft.gencursor)) == 0;
1379}
1380
1381static inline int gencursor_next(struct net *net)
1382{
1383 return net->nft.gencursor+1 == 1 ? 1 : 0;
1384}
1385
1386static inline int
1387nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
1388{
1389 return (rule->genmask & (1 << gencursor_next(net))) == 0;
1390}
1391
1392static inline void
1393nft_rule_activate_next(struct net *net, struct nft_rule *rule)
1394{
1395 /* Now inactive, will be active in the future */
1396 rule->genmask = (1 << net->nft.gencursor);
1397}
1398
1399static inline void
1400nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
1401{
1402 rule->genmask = (1 << gencursor_next(net));
1403}
1404
1405static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
1406{
1407 rule->genmask = 0;
1408}
1409
1374static int nf_tables_dump_rules(struct sk_buff *skb, 1410static int nf_tables_dump_rules(struct sk_buff *skb,
1375 struct netlink_callback *cb) 1411 struct netlink_callback *cb)
1376{ 1412{
@@ -1382,6 +1418,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
1382 unsigned int idx = 0, s_idx = cb->args[0]; 1418 unsigned int idx = 0, s_idx = cb->args[0];
1383 struct net *net = sock_net(skb->sk); 1419 struct net *net = sock_net(skb->sk);
1384 int family = nfmsg->nfgen_family; 1420 int family = nfmsg->nfgen_family;
1421 u8 genctr = ACCESS_ONCE(net->nft.genctr);
1422 u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
1385 1423
1386 list_for_each_entry(afi, &net->nft.af_info, list) { 1424 list_for_each_entry(afi, &net->nft.af_info, list) {
1387 if (family != NFPROTO_UNSPEC && family != afi->family) 1425 if (family != NFPROTO_UNSPEC && family != afi->family)
@@ -1390,6 +1428,8 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
1390 list_for_each_entry(table, &afi->tables, list) { 1428 list_for_each_entry(table, &afi->tables, list) {
1391 list_for_each_entry(chain, &table->chains, list) { 1429 list_for_each_entry(chain, &table->chains, list) {
1392 list_for_each_entry(rule, &chain->rules, list) { 1430 list_for_each_entry(rule, &chain->rules, list) {
1431 if (!nft_rule_is_active(net, rule))
1432 goto cont;
1393 if (idx < s_idx) 1433 if (idx < s_idx)
1394 goto cont; 1434 goto cont;
1395 if (idx > s_idx) 1435 if (idx > s_idx)
@@ -1408,6 +1448,10 @@ cont:
1408 } 1448 }
1409 } 1449 }
1410done: 1450done:
1451 /* Invalidate this dump, a transition to the new generation happened */
1452 if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
1453 return -EBUSY;
1454
1411 cb->args[0] = idx; 1455 cb->args[0] = idx;
1412 return skb->len; 1456 return skb->len;
1413} 1457}
@@ -1492,6 +1536,25 @@ static void nf_tables_rule_destroy(struct nft_rule *rule)
1492 1536
1493static struct nft_expr_info *info; 1537static struct nft_expr_info *info;
1494 1538
1539static struct nft_rule_trans *
1540nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
1541{
1542 struct nft_rule_trans *rupd;
1543
1544 rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
1545 if (rupd == NULL)
1546 return NULL;
1547
1548 rupd->chain = ctx->chain;
1549 rupd->table = ctx->table;
1550 rupd->rule = rule;
1551 rupd->family = ctx->afi->family;
1552 rupd->nlh = ctx->nlh;
1553 list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
1554
1555 return rupd;
1556}
1557
1495static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb, 1558static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1496 const struct nlmsghdr *nlh, 1559 const struct nlmsghdr *nlh,
1497 const struct nlattr * const nla[]) 1560 const struct nlattr * const nla[])
@@ -1502,6 +1565,7 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1502 struct nft_table *table; 1565 struct nft_table *table;
1503 struct nft_chain *chain; 1566 struct nft_chain *chain;
1504 struct nft_rule *rule, *old_rule = NULL; 1567 struct nft_rule *rule, *old_rule = NULL;
1568 struct nft_rule_trans *repl = NULL;
1505 struct nft_expr *expr; 1569 struct nft_expr *expr;
1506 struct nft_ctx ctx; 1570 struct nft_ctx ctx;
1507 struct nlattr *tmp; 1571 struct nlattr *tmp;
@@ -1576,6 +1640,8 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1576 if (rule == NULL) 1640 if (rule == NULL)
1577 goto err1; 1641 goto err1;
1578 1642
1643 nft_rule_activate_next(net, rule);
1644
1579 rule->handle = handle; 1645 rule->handle = handle;
1580 rule->dlen = size; 1646 rule->dlen = size;
1581 1647
@@ -1589,8 +1655,18 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1589 } 1655 }
1590 1656
1591 if (nlh->nlmsg_flags & NLM_F_REPLACE) { 1657 if (nlh->nlmsg_flags & NLM_F_REPLACE) {
1592 list_replace_rcu(&old_rule->list, &rule->list); 1658 if (nft_rule_is_active_next(net, old_rule)) {
1593 nf_tables_rule_destroy(old_rule); 1659 repl = nf_tables_trans_add(old_rule, &ctx);
1660 if (repl == NULL) {
1661 err = -ENOMEM;
1662 goto err2;
1663 }
1664 nft_rule_disactivate_next(net, old_rule);
1665 list_add_tail(&rule->list, &old_rule->list);
1666 } else {
1667 err = -ENOENT;
1668 goto err2;
1669 }
1594 } else if (nlh->nlmsg_flags & NLM_F_APPEND) 1670 } else if (nlh->nlmsg_flags & NLM_F_APPEND)
1595 if (old_rule) 1671 if (old_rule)
1596 list_add_rcu(&rule->list, &old_rule->list); 1672 list_add_rcu(&rule->list, &old_rule->list);
@@ -1603,11 +1679,20 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1603 list_add_rcu(&rule->list, &chain->rules); 1679 list_add_rcu(&rule->list, &chain->rules);
1604 } 1680 }
1605 1681
1606 nf_tables_rule_notify(skb, nlh, table, chain, rule, NFT_MSG_NEWRULE, 1682 if (nf_tables_trans_add(rule, &ctx) == NULL) {
1607 nlh->nlmsg_flags & (NLM_F_APPEND | NLM_F_REPLACE), 1683 err = -ENOMEM;
1608 nfmsg->nfgen_family); 1684 goto err3;
1685 }
1609 return 0; 1686 return 0;
1610 1687
1688err3:
1689 list_del_rcu(&rule->list);
1690 if (repl) {
1691 list_del_rcu(&repl->rule->list);
1692 list_del(&repl->list);
1693 nft_rule_clear(net, repl->rule);
1694 kfree(repl);
1695 }
1611err2: 1696err2:
1612 nf_tables_rule_destroy(rule); 1697 nf_tables_rule_destroy(rule);
1613err1: 1698err1:
@@ -1618,6 +1703,19 @@ err1:
1618 return err; 1703 return err;
1619} 1704}
1620 1705
1706static int
1707nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
1708{
1709 /* You cannot delete the same rule twice */
1710 if (nft_rule_is_active_next(ctx->net, rule)) {
1711 if (nf_tables_trans_add(rule, ctx) == NULL)
1712 return -ENOMEM;
1713 nft_rule_disactivate_next(ctx->net, rule);
1714 return 0;
1715 }
1716 return -ENOENT;
1717}
1718
1621static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, 1719static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
1622 const struct nlmsghdr *nlh, 1720 const struct nlmsghdr *nlh,
1623 const struct nlattr * const nla[]) 1721 const struct nlattr * const nla[])
@@ -1628,7 +1726,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
1628 const struct nft_table *table; 1726 const struct nft_table *table;
1629 struct nft_chain *chain; 1727 struct nft_chain *chain;
1630 struct nft_rule *rule, *tmp; 1728 struct nft_rule *rule, *tmp;
1631 int family = nfmsg->nfgen_family; 1729 int family = nfmsg->nfgen_family, err = 0;
1730 struct nft_ctx ctx;
1632 1731
1633 afi = nf_tables_afinfo_lookup(net, family, false); 1732 afi = nf_tables_afinfo_lookup(net, family, false);
1634 if (IS_ERR(afi)) 1733 if (IS_ERR(afi))
@@ -1642,31 +1741,95 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
1642 if (IS_ERR(chain)) 1741 if (IS_ERR(chain))
1643 return PTR_ERR(chain); 1742 return PTR_ERR(chain);
1644 1743
1744 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1745
1645 if (nla[NFTA_RULE_HANDLE]) { 1746 if (nla[NFTA_RULE_HANDLE]) {
1646 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); 1747 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
1647 if (IS_ERR(rule)) 1748 if (IS_ERR(rule))
1648 return PTR_ERR(rule); 1749 return PTR_ERR(rule);
1649 1750
1650 /* List removal must be visible before destroying expressions */ 1751 err = nf_tables_delrule_one(&ctx, rule);
1651 list_del_rcu(&rule->list);
1652
1653 nf_tables_rule_notify(skb, nlh, table, chain, rule,
1654 NFT_MSG_DELRULE, 0, family);
1655 nf_tables_rule_destroy(rule);
1656 } else { 1752 } else {
1657 /* Remove all rules in this chain */ 1753 /* Remove all rules in this chain */
1658 list_for_each_entry_safe(rule, tmp, &chain->rules, list) { 1754 list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
1659 list_del_rcu(&rule->list); 1755 err = nf_tables_delrule_one(&ctx, rule);
1756 if (err < 0)
1757 break;
1758 }
1759 }
1760
1761 return err;
1762}
1763
1764static int nf_tables_commit(struct sk_buff *skb)
1765{
1766 struct net *net = sock_net(skb->sk);
1767 struct nft_rule_trans *rupd, *tmp;
1660 1768
1661 nf_tables_rule_notify(skb, nlh, table, chain, rule, 1769 /* Bump generation counter, invalidate any dump in progress */
1662 NFT_MSG_DELRULE, 0, family); 1770 net->nft.genctr++;
1663 nf_tables_rule_destroy(rule); 1771
1772 /* A new generation has just started */
1773 net->nft.gencursor = gencursor_next(net);
1774
1775 /* Make sure all packets have left the previous generation before
1776 * purging old rules.
1777 */
1778 synchronize_rcu();
1779
1780 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1781 /* Delete this rule from the dirty list */
1782 list_del(&rupd->list);
1783
1784 /* This rule was inactive in the past and just became active.
1785 * Clear the next bit of the genmask since its meaning has
1786 * changed, now it is the future.
1787 */
1788 if (nft_rule_is_active(net, rupd->rule)) {
1789 nft_rule_clear(net, rupd->rule);
1790 nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
1791 rupd->chain, rupd->rule,
1792 NFT_MSG_NEWRULE, 0,
1793 rupd->family);
1794 kfree(rupd);
1795 continue;
1664 } 1796 }
1797
1798 /* This rule is in the past, get rid of it */
1799 list_del_rcu(&rupd->rule->list);
1800 nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
1801 rupd->rule, NFT_MSG_DELRULE, 0,
1802 rupd->family);
1803 nf_tables_rule_destroy(rupd->rule);
1804 kfree(rupd);
1665 } 1805 }
1666 1806
1667 return 0; 1807 return 0;
1668} 1808}
1669 1809
1810static int nf_tables_abort(struct sk_buff *skb)
1811{
1812 struct net *net = sock_net(skb->sk);
1813 struct nft_rule_trans *rupd, *tmp;
1814
1815 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1816 /* Delete all rules from the dirty list */
1817 list_del(&rupd->list);
1818
1819 if (!nft_rule_is_active_next(net, rupd->rule)) {
1820 nft_rule_clear(net, rupd->rule);
1821 kfree(rupd);
1822 continue;
1823 }
1824
1825 /* This rule is inactive, get rid of it */
1826 list_del_rcu(&rupd->rule->list);
1827 nf_tables_rule_destroy(rupd->rule);
1828 kfree(rupd);
1829 }
1830 return 0;
1831}
1832
1670/* 1833/*
1671 * Sets 1834 * Sets
1672 */ 1835 */
@@ -2634,7 +2797,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2634 .policy = nft_chain_policy, 2797 .policy = nft_chain_policy,
2635 }, 2798 },
2636 [NFT_MSG_NEWRULE] = { 2799 [NFT_MSG_NEWRULE] = {
2637 .call = nf_tables_newrule, 2800 .call_batch = nf_tables_newrule,
2638 .attr_count = NFTA_RULE_MAX, 2801 .attr_count = NFTA_RULE_MAX,
2639 .policy = nft_rule_policy, 2802 .policy = nft_rule_policy,
2640 }, 2803 },
@@ -2644,7 +2807,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2644 .policy = nft_rule_policy, 2807 .policy = nft_rule_policy,
2645 }, 2808 },
2646 [NFT_MSG_DELRULE] = { 2809 [NFT_MSG_DELRULE] = {
2647 .call = nf_tables_delrule, 2810 .call_batch = nf_tables_delrule,
2648 .attr_count = NFTA_RULE_MAX, 2811 .attr_count = NFTA_RULE_MAX,
2649 .policy = nft_rule_policy, 2812 .policy = nft_rule_policy,
2650 }, 2813 },
@@ -2685,6 +2848,8 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
2685 .subsys_id = NFNL_SUBSYS_NFTABLES, 2848 .subsys_id = NFNL_SUBSYS_NFTABLES,
2686 .cb_count = NFT_MSG_MAX, 2849 .cb_count = NFT_MSG_MAX,
2687 .cb = nf_tables_cb, 2850 .cb = nf_tables_cb,
2851 .commit = nf_tables_commit,
2852 .abort = nf_tables_abort,
2688}; 2853};
2689 2854
2690/* 2855/*
@@ -3056,6 +3221,7 @@ EXPORT_SYMBOL_GPL(nft_data_dump);
3056static int nf_tables_init_net(struct net *net) 3221static int nf_tables_init_net(struct net *net)
3057{ 3222{
3058 INIT_LIST_HEAD(&net->nft.af_info); 3223 INIT_LIST_HEAD(&net->nft.af_info);
3224 INIT_LIST_HEAD(&net->nft.commit_list);
3059 return 0; 3225 return 0;
3060} 3226}
3061 3227
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 3c13007d80df..d581ef660248 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -88,12 +88,22 @@ nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
88 struct nft_data data[NFT_REG_MAX + 1]; 88 struct nft_data data[NFT_REG_MAX + 1];
89 unsigned int stackptr = 0; 89 unsigned int stackptr = 0;
90 struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; 90 struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
91 /*
92 * Cache cursor to avoid problems in case that the cursor is updated
93 * while traversing the ruleset.
94 */
95 unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
91 96
92do_chain: 97do_chain:
93 rule = list_entry(&chain->rules, struct nft_rule, list); 98 rule = list_entry(&chain->rules, struct nft_rule, list);
94next_rule: 99next_rule:
95 data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; 100 data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
96 list_for_each_entry_continue_rcu(rule, &chain->rules, list) { 101 list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
102
103 /* This rule is not active, skip. */
104 if (unlikely(rule->genmask & (1 << gencursor)))
105 continue;
106
97 nft_rule_for_each_expr(expr, last, rule) { 107 nft_rule_for_each_expr(expr, last, rule) {
98 if (expr->ops == &nft_cmp_fast_ops) 108 if (expr->ops == &nft_cmp_fast_ops)
99 nft_cmp_fast_eval(expr, data); 109 nft_cmp_fast_eval(expr, data);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87dc116f..027f16af51a0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
147 const struct nfnetlink_subsystem *ss; 147 const struct nfnetlink_subsystem *ss;
148 int type, err; 148 int type, err;
149 149
150 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
151 return -EPERM;
152
153 /* All the messages must at least contain nfgenmsg */ 150 /* All the messages must at least contain nfgenmsg */
154 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) 151 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
155 return 0; 152 return 0;
@@ -217,9 +214,179 @@ replay:
217 } 214 }
218} 215}
219 216
217static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
218 u_int16_t subsys_id)
219{
220 struct sk_buff *nskb, *oskb = skb;
221 struct net *net = sock_net(skb->sk);
222 const struct nfnetlink_subsystem *ss;
223 const struct nfnl_callback *nc;
224 bool success = true, done = false;
225 int err;
226
227 if (subsys_id >= NFNL_SUBSYS_COUNT)
228 return netlink_ack(skb, nlh, -EINVAL);
229replay:
230 nskb = netlink_skb_clone(oskb, GFP_KERNEL);
231 if (!nskb)
232 return netlink_ack(oskb, nlh, -ENOMEM);
233
234 nskb->sk = oskb->sk;
235 skb = nskb;
236
237 nfnl_lock(subsys_id);
238 ss = rcu_dereference_protected(table[subsys_id].subsys,
239 lockdep_is_held(&table[subsys_id].mutex));
240 if (!ss) {
241#ifdef CONFIG_MODULES
242 nfnl_unlock(subsys_id);
243 request_module("nfnetlink-subsys-%d", subsys_id);
244 nfnl_lock(subsys_id);
245 ss = rcu_dereference_protected(table[subsys_id].subsys,
246 lockdep_is_held(&table[subsys_id].mutex));
247 if (!ss)
248#endif
249 {
250 nfnl_unlock(subsys_id);
251 kfree_skb(nskb);
252 return netlink_ack(skb, nlh, -EOPNOTSUPP);
253 }
254 }
255
256 if (!ss->commit || !ss->abort) {
257 nfnl_unlock(subsys_id);
258 kfree_skb(nskb);
259 return netlink_ack(skb, nlh, -EOPNOTSUPP);
260 }
261
262 while (skb->len >= nlmsg_total_size(0)) {
263 int msglen, type;
264
265 nlh = nlmsg_hdr(skb);
266 err = 0;
267
268 if (nlh->nlmsg_len < NLMSG_HDRLEN) {
269 err = -EINVAL;
270 goto ack;
271 }
272
273 /* Only requests are handled by the kernel */
274 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
275 err = -EINVAL;
276 goto ack;
277 }
278
279 type = nlh->nlmsg_type;
280 if (type == NFNL_MSG_BATCH_BEGIN) {
281 /* Malformed: Batch begin twice */
282 success = false;
283 goto done;
284 } else if (type == NFNL_MSG_BATCH_END) {
285 done = true;
286 goto done;
287 } else if (type < NLMSG_MIN_TYPE) {
288 err = -EINVAL;
289 goto ack;
290 }
291
292 /* We only accept a batch with messages for the same
293 * subsystem.
294 */
295 if (NFNL_SUBSYS_ID(type) != subsys_id) {
296 err = -EINVAL;
297 goto ack;
298 }
299
300 nc = nfnetlink_find_client(type, ss);
301 if (!nc) {
302 err = -EINVAL;
303 goto ack;
304 }
305
306 {
307 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
308 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
309 struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
310 struct nlattr *attr = (void *)nlh + min_len;
311 int attrlen = nlh->nlmsg_len - min_len;
312
313 err = nla_parse(cda, ss->cb[cb_id].attr_count,
314 attr, attrlen, ss->cb[cb_id].policy);
315 if (err < 0)
316 goto ack;
317
318 if (nc->call_batch) {
319 err = nc->call_batch(net->nfnl, skb, nlh,
320 (const struct nlattr **)cda);
321 }
322
323 /* The lock was released to autoload some module, we
324 * have to abort and start from scratch using the
325 * original skb.
326 */
327 if (err == -EAGAIN) {
328 ss->abort(skb);
329 nfnl_unlock(subsys_id);
330 kfree_skb(nskb);
331 goto replay;
332 }
333 }
334ack:
335 if (nlh->nlmsg_flags & NLM_F_ACK || err) {
336 /* We don't stop processing the batch on errors, thus,
337 * userspace gets all the errors that the batch
338 * triggers.
339 */
340 netlink_ack(skb, nlh, err);
341 if (err)
342 success = false;
343 }
344
345 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
346 if (msglen > skb->len)
347 msglen = skb->len;
348 skb_pull(skb, msglen);
349 }
350done:
351 if (success && done)
352 ss->commit(skb);
353 else
354 ss->abort(skb);
355
356 nfnl_unlock(subsys_id);
357 kfree_skb(nskb);
358}
359
220static void nfnetlink_rcv(struct sk_buff *skb) 360static void nfnetlink_rcv(struct sk_buff *skb)
221{ 361{
222 netlink_rcv_skb(skb, &nfnetlink_rcv_msg); 362 struct nlmsghdr *nlh = nlmsg_hdr(skb);
363 struct net *net = sock_net(skb->sk);
364 int msglen;
365
366 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
367 return netlink_ack(skb, nlh, -EPERM);
368
369 if (nlh->nlmsg_len < NLMSG_HDRLEN ||
370 skb->len < nlh->nlmsg_len)
371 return;
372
373 if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
374 struct nfgenmsg *nfgenmsg;
375
376 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
377 if (msglen > skb->len)
378 msglen = skb->len;
379
380 if (nlh->nlmsg_len < NLMSG_HDRLEN ||
381 skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
382 return;
383
384 nfgenmsg = nlmsg_data(nlh);
385 skb_pull(skb, msglen);
386 nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
387 } else {
388 netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
389 }
223} 390}
224 391
225#ifdef CONFIG_MODULES 392#ifdef CONFIG_MODULES