aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/isdn/hardware/eicon/divasmain.c2
-rw-r--r--drivers/isdn/sc/init.c2
-rw-r--r--drivers/net/bonding/bond_3ad.c10
-rw-r--r--drivers/net/bonding/bond_alb.c58
-rw-r--r--drivers/net/bonding/bond_sysfs.c30
-rw-r--r--drivers/net/bonding/bonding.h14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/srq.c1
-rw-r--r--drivers/net/xen-netback/common.h12
-rw-r--r--drivers/net/xen-netback/interface.c16
-rw-r--r--drivers/net/xen-netback/netback.c294
-rw-r--r--drivers/net/xen-netback/xenbus.c52
-rw-r--r--include/linux/mlx4/cmd.h6
-rw-r--r--include/linux/mlx4/device.h2
-rw-r--r--include/linux/netfilter.h14
-rw-r--r--include/linux/netfilter/nfnetlink.h5
-rw-r--r--include/net/inet_timewait_sock.h2
-rw-r--r--include/net/irda/irda_device.h2
-rw-r--r--include/net/net_namespace.h4
-rw-r--r--include/net/netfilter/nf_nat.h3
-rw-r--r--include/net/netfilter/nf_tables.h522
-rw-r--r--include/net/netfilter/nf_tables_core.h42
-rw-r--r--include/net/netfilter/nf_tables_ipv4.h23
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h30
-rw-r--r--include/net/netns/nftables.h19
-rw-r--r--include/net/route.h8
-rw-r--r--include/uapi/linux/netfilter/Kbuild2
-rw-r--r--include/uapi/linux/netfilter/nf_conntrack_common.h4
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h718
-rw-r--r--include/uapi/linux/netfilter/nf_tables_compat.h38
-rw-r--r--include/uapi/linux/netfilter/nfnetlink.h10
-rw-r--r--include/xen/interface/io/netif.h18
-rw-r--r--net/batman-adv/Makefile2
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c14
-rw-r--r--net/batman-adv/distributed-arp-table.c11
-rw-r--r--net/batman-adv/fragmentation.c491
-rw-r--r--net/batman-adv/fragmentation.h50
-rw-r--r--net/batman-adv/gateway_client.c23
-rw-r--r--net/batman-adv/hard-interface.c22
-rw-r--r--net/batman-adv/icmp_socket.c22
-rw-r--r--net/batman-adv/main.c37
-rw-r--r--net/batman-adv/main.h10
-rw-r--r--net/batman-adv/originator.c19
-rw-r--r--net/batman-adv/packet.h79
-rw-r--r--net/batman-adv/routing.c185
-rw-r--r--net/batman-adv/routing.h4
-rw-r--r--net/batman-adv/send.c186
-rw-r--r--net/batman-adv/send.h40
-rw-r--r--net/batman-adv/soft-interface.c34
-rw-r--r--net/batman-adv/translation-table.c63
-rw-r--r--net/batman-adv/translation-table.h7
-rw-r--r--net/batman-adv/types.h70
-rw-r--r--net/batman-adv/unicast.c491
-rw-r--r--net/batman-adv/unicast.h92
-rw-r--r--net/bridge/br_netfilter.c22
-rw-r--r--net/bridge/netfilter/Kconfig3
-rw-r--r--net/bridge/netfilter/Makefile2
-rw-r--r--net/bridge/netfilter/ebtable_filter.c16
-rw-r--r--net/bridge/netfilter/ebtable_nat.c16
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c65
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/ipv4/inet_diag.c9
-rw-r--r--net/ipv4/netfilter/Kconfig21
-rw-r--r--net/ipv4/netfilter/Makefile6
-rw-r--r--net/ipv4/netfilter/arptable_filter.c5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c2
-rw-r--r--net/ipv4/netfilter/iptable_filter.c7
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c10
-rw-r--r--net/ipv4/netfilter/iptable_nat.c26
-rw-r--r--net/ipv4/netfilter/iptable_raw.c6
-rw-r--r--net/ipv4/netfilter/iptable_security.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c12
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c102
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c128
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c205
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c90
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c123
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv6/netfilter/Kconfig13
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c2
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c5
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c10
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c27
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c5
-rw-r--r--net/ipv6/netfilter/ip6table_security.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c6
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c127
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c211
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c88
-rw-r--r--net/netfilter/Kconfig52
-rw-r--r--net/netfilter/Makefile18
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c42
-rw-r--r--net/netfilter/nf_nat_core.c20
-rw-r--r--net/netfilter/nf_tables_api.c3275
-rw-r--r--net/netfilter/nf_tables_core.c270
-rw-r--r--net/netfilter/nfnetlink.c175
-rw-r--r--net/netfilter/nft_bitwise.c146
-rw-r--r--net/netfilter/nft_byteorder.c173
-rw-r--r--net/netfilter/nft_cmp.c223
-rw-r--r--net/netfilter/nft_compat.c768
-rw-r--r--net/netfilter/nft_counter.c113
-rw-r--r--net/netfilter/nft_ct.c258
-rw-r--r--net/netfilter/nft_expr_template.c94
-rw-r--r--net/netfilter/nft_exthdr.c133
-rw-r--r--net/netfilter/nft_hash.c231
-rw-r--r--net/netfilter/nft_immediate.c132
-rw-r--r--net/netfilter/nft_limit.c119
-rw-r--r--net/netfilter/nft_log.c146
-rw-r--r--net/netfilter/nft_lookup.c141
-rw-r--r--net/netfilter/nft_meta.c228
-rw-r--r--net/netfilter/nft_meta_target.c117
-rw-r--r--net/netfilter/nft_nat.c220
-rw-r--r--net/netfilter/nft_payload.c160
-rw-r--r--net/netfilter/nft_rbtree.c247
-rw-r--r--security/selinux/hooks.c10
124 files changed, 11781 insertions, 1117 deletions
diff --git a/drivers/isdn/hardware/eicon/divasmain.c b/drivers/isdn/hardware/eicon/divasmain.c
index 52377b4bf039..a2e0ed6c9a4d 100644
--- a/drivers/isdn/hardware/eicon/divasmain.c
+++ b/drivers/isdn/hardware/eicon/divasmain.c
@@ -481,7 +481,7 @@ void __inline__ outpp(void __iomem *addr, word p)
481int diva_os_register_irq(void *context, byte irq, const char *name) 481int diva_os_register_irq(void *context, byte irq, const char *name)
482{ 482{
483 int result = request_irq(irq, diva_os_irq_wrapper, 483 int result = request_irq(irq, diva_os_irq_wrapper,
484 IRQF_DISABLED | IRQF_SHARED, name, context); 484 IRQF_SHARED, name, context);
485 return (result); 485 return (result);
486} 486}
487 487
diff --git a/drivers/isdn/sc/init.c b/drivers/isdn/sc/init.c
index ca997bd4e818..92acc81f844d 100644
--- a/drivers/isdn/sc/init.c
+++ b/drivers/isdn/sc/init.c
@@ -336,7 +336,7 @@ static int __init sc_init(void)
336 */ 336 */
337 sc_adapter[cinst]->interrupt = irq[b]; 337 sc_adapter[cinst]->interrupt = irq[b];
338 if (request_irq(sc_adapter[cinst]->interrupt, interrupt_handler, 338 if (request_irq(sc_adapter[cinst]->interrupt, interrupt_handler,
339 IRQF_DISABLED, interface->id, 339 0, interface->id,
340 (void *)(unsigned long) cinst)) 340 (void *)(unsigned long) cinst))
341 { 341 {
342 kfree(sc_adapter[cinst]->channel); 342 kfree(sc_adapter[cinst]->channel);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index ea3e64e22e22..187b1b7772ef 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2344,7 +2344,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
2344 struct slave *slave; 2344 struct slave *slave;
2345 struct port *port; 2345 struct port *port;
2346 2346
2347 bond_for_each_slave(bond, slave, iter) { 2347 bond_for_each_slave_rcu(bond, slave, iter) {
2348 port = &(SLAVE_AD_INFO(slave).port); 2348 port = &(SLAVE_AD_INFO(slave).port);
2349 if (port->aggregator && port->aggregator->is_active) { 2349 if (port->aggregator && port->aggregator->is_active) {
2350 aggregator = port->aggregator; 2350 aggregator = port->aggregator;
@@ -2369,9 +2369,9 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
2369{ 2369{
2370 int ret; 2370 int ret;
2371 2371
2372 read_lock(&bond->lock); 2372 rcu_read_lock();
2373 ret = __bond_3ad_get_active_agg_info(bond, ad_info); 2373 ret = __bond_3ad_get_active_agg_info(bond, ad_info);
2374 read_unlock(&bond->lock); 2374 rcu_read_unlock();
2375 2375
2376 return ret; 2376 return ret;
2377} 2377}
@@ -2388,7 +2388,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
2388 int res = 1; 2388 int res = 1;
2389 int agg_id; 2389 int agg_id;
2390 2390
2391 read_lock(&bond->lock);
2392 if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { 2391 if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
2393 pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n", 2392 pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",
2394 dev->name); 2393 dev->name);
@@ -2406,7 +2405,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
2406 slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg); 2405 slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
2407 first_ok_slave = NULL; 2406 first_ok_slave = NULL;
2408 2407
2409 bond_for_each_slave(bond, slave, iter) { 2408 bond_for_each_slave_rcu(bond, slave, iter) {
2410 agg = SLAVE_AD_INFO(slave).port.aggregator; 2409 agg = SLAVE_AD_INFO(slave).port.aggregator;
2411 if (!agg || agg->aggregator_identifier != agg_id) 2410 if (!agg || agg->aggregator_identifier != agg_id)
2412 continue; 2411 continue;
@@ -2436,7 +2435,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
2436 res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev); 2435 res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
2437 2436
2438out: 2437out:
2439 read_unlock(&bond->lock);
2440 if (res) { 2438 if (res) {
2441 /* no suitable interface, frame not sent */ 2439 /* no suitable interface, frame not sent */
2442 kfree_skb(skb); 2440 kfree_skb(skb);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 576cceae026a..02872405d35d 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -230,7 +230,7 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
230 max_gap = LLONG_MIN; 230 max_gap = LLONG_MIN;
231 231
232 /* Find the slave with the largest gap */ 232 /* Find the slave with the largest gap */
233 bond_for_each_slave(bond, slave, iter) { 233 bond_for_each_slave_rcu(bond, slave, iter) {
234 if (SLAVE_IS_OK(slave)) { 234 if (SLAVE_IS_OK(slave)) {
235 long long gap = compute_gap(slave); 235 long long gap = compute_gap(slave);
236 236
@@ -412,6 +412,39 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)
412 return rx_slave; 412 return rx_slave;
413} 413}
414 414
415/* Caller must hold rcu_read_lock() for read */
416static struct slave *__rlb_next_rx_slave(struct bonding *bond)
417{
418 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
419 struct slave *before = NULL, *rx_slave = NULL, *slave;
420 struct list_head *iter;
421 bool found = false;
422
423 bond_for_each_slave_rcu(bond, slave, iter) {
424 if (!SLAVE_IS_OK(slave))
425 continue;
426 if (!found) {
427 if (!before || before->speed < slave->speed)
428 before = slave;
429 } else {
430 if (!rx_slave || rx_slave->speed < slave->speed)
431 rx_slave = slave;
432 }
433 if (slave == bond_info->rx_slave)
434 found = true;
435 }
436 /* we didn't find anything after the current or we have something
437 * better before and up to the current slave
438 */
439 if (!rx_slave || (before && rx_slave->speed < before->speed))
440 rx_slave = before;
441
442 if (rx_slave)
443 bond_info->rx_slave = rx_slave;
444
445 return rx_slave;
446}
447
415/* teach the switch the mac of a disabled slave 448/* teach the switch the mac of a disabled slave
416 * on the primary for fault tolerance 449 * on the primary for fault tolerance
417 * 450 *
@@ -628,12 +661,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
628{ 661{
629 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); 662 struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
630 struct arp_pkt *arp = arp_pkt(skb); 663 struct arp_pkt *arp = arp_pkt(skb);
631 struct slave *assigned_slave; 664 struct slave *assigned_slave, *curr_active_slave;
632 struct rlb_client_info *client_info; 665 struct rlb_client_info *client_info;
633 u32 hash_index = 0; 666 u32 hash_index = 0;
634 667
635 _lock_rx_hashtbl(bond); 668 _lock_rx_hashtbl(bond);
636 669
670 curr_active_slave = rcu_dereference(bond->curr_active_slave);
671
637 hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst)); 672 hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
638 client_info = &(bond_info->rx_hashtbl[hash_index]); 673 client_info = &(bond_info->rx_hashtbl[hash_index]);
639 674
@@ -658,14 +693,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
658 * that the new client can be assigned to this entry. 693 * that the new client can be assigned to this entry.
659 */ 694 */
660 if (bond->curr_active_slave && 695 if (bond->curr_active_slave &&
661 client_info->slave != bond->curr_active_slave) { 696 client_info->slave != curr_active_slave) {
662 client_info->slave = bond->curr_active_slave; 697 client_info->slave = curr_active_slave;
663 rlb_update_client(client_info); 698 rlb_update_client(client_info);
664 } 699 }
665 } 700 }
666 } 701 }
667 /* assign a new slave */ 702 /* assign a new slave */
668 assigned_slave = rlb_next_rx_slave(bond); 703 assigned_slave = __rlb_next_rx_slave(bond);
669 704
670 if (assigned_slave) { 705 if (assigned_slave) {
671 if (!(client_info->assigned && 706 if (!(client_info->assigned &&
@@ -728,7 +763,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
728 /* Don't modify or load balance ARPs that do not originate locally 763 /* Don't modify or load balance ARPs that do not originate locally
729 * (e.g.,arrive via a bridge). 764 * (e.g.,arrive via a bridge).
730 */ 765 */
731 if (!bond_slave_has_mac(bond, arp->mac_src)) 766 if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
732 return NULL; 767 return NULL;
733 768
734 if (arp->op_code == htons(ARPOP_REPLY)) { 769 if (arp->op_code == htons(ARPOP_REPLY)) {
@@ -1343,11 +1378,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1343 skb_reset_mac_header(skb); 1378 skb_reset_mac_header(skb);
1344 eth_data = eth_hdr(skb); 1379 eth_data = eth_hdr(skb);
1345 1380
1346 /* make sure that the curr_active_slave do not change during tx
1347 */
1348 read_lock(&bond->lock);
1349 read_lock(&bond->curr_slave_lock);
1350
1351 switch (ntohs(skb->protocol)) { 1381 switch (ntohs(skb->protocol)) {
1352 case ETH_P_IP: { 1382 case ETH_P_IP: {
1353 const struct iphdr *iph = ip_hdr(skb); 1383 const struct iphdr *iph = ip_hdr(skb);
@@ -1429,12 +1459,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1429 1459
1430 if (!tx_slave) { 1460 if (!tx_slave) {
1431 /* unbalanced or unassigned, send through primary */ 1461 /* unbalanced or unassigned, send through primary */
1432 tx_slave = bond->curr_active_slave; 1462 tx_slave = rcu_dereference(bond->curr_active_slave);
1433 bond_info->unbalanced_load += skb->len; 1463 bond_info->unbalanced_load += skb->len;
1434 } 1464 }
1435 1465
1436 if (tx_slave && SLAVE_IS_OK(tx_slave)) { 1466 if (tx_slave && SLAVE_IS_OK(tx_slave)) {
1437 if (tx_slave != bond->curr_active_slave) { 1467 if (tx_slave != rcu_dereference(bond->curr_active_slave)) {
1438 memcpy(eth_data->h_source, 1468 memcpy(eth_data->h_source,
1439 tx_slave->dev->dev_addr, 1469 tx_slave->dev->dev_addr,
1440 ETH_ALEN); 1470 ETH_ALEN);
@@ -1449,8 +1479,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1449 } 1479 }
1450 } 1480 }
1451 1481
1452 read_unlock(&bond->curr_slave_lock);
1453 read_unlock(&bond->lock);
1454 if (res) { 1482 if (res) {
1455 /* no suitable interface, frame not sent */ 1483 /* no suitable interface, frame not sent */
1456 kfree_skb(skb); 1484 kfree_skb(skb);
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index e9249527e7e7..03bed0ca935e 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -179,7 +179,9 @@ static ssize_t bonding_show_slaves(struct device *d,
179 struct slave *slave; 179 struct slave *slave;
180 int res = 0; 180 int res = 0;
181 181
182 read_lock(&bond->lock); 182 if (!rtnl_trylock())
183 return restart_syscall();
184
183 bond_for_each_slave(bond, slave, iter) { 185 bond_for_each_slave(bond, slave, iter) {
184 if (res > (PAGE_SIZE - IFNAMSIZ)) { 186 if (res > (PAGE_SIZE - IFNAMSIZ)) {
185 /* not enough space for another interface name */ 187 /* not enough space for another interface name */
@@ -190,7 +192,9 @@ static ssize_t bonding_show_slaves(struct device *d,
190 } 192 }
191 res += sprintf(buf + res, "%s ", slave->dev->name); 193 res += sprintf(buf + res, "%s ", slave->dev->name);
192 } 194 }
193 read_unlock(&bond->lock); 195
196 rtnl_unlock();
197
194 if (res) 198 if (res)
195 buf[res-1] = '\n'; /* eat the leftover space */ 199 buf[res-1] = '\n'; /* eat the leftover space */
196 200
@@ -626,6 +630,9 @@ static ssize_t bonding_store_arp_targets(struct device *d,
626 unsigned long *targets_rx; 630 unsigned long *targets_rx;
627 int ind, i, j, ret = -EINVAL; 631 int ind, i, j, ret = -EINVAL;
628 632
633 if (!rtnl_trylock())
634 return restart_syscall();
635
629 targets = bond->params.arp_targets; 636 targets = bond->params.arp_targets;
630 newtarget = in_aton(buf + 1); 637 newtarget = in_aton(buf + 1);
631 /* look for adds */ 638 /* look for adds */
@@ -699,6 +706,7 @@ static ssize_t bonding_store_arp_targets(struct device *d,
699 706
700 ret = count; 707 ret = count;
701out: 708out:
709 rtnl_unlock();
702 return ret; 710 return ret;
703} 711}
704static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets); 712static DEVICE_ATTR(arp_ip_target, S_IRUGO | S_IWUSR , bonding_show_arp_targets, bonding_store_arp_targets);
@@ -1467,7 +1475,6 @@ static ssize_t bonding_show_queue_id(struct device *d,
1467 if (!rtnl_trylock()) 1475 if (!rtnl_trylock())
1468 return restart_syscall(); 1476 return restart_syscall();
1469 1477
1470 read_lock(&bond->lock);
1471 bond_for_each_slave(bond, slave, iter) { 1478 bond_for_each_slave(bond, slave, iter) {
1472 if (res > (PAGE_SIZE - IFNAMSIZ - 6)) { 1479 if (res > (PAGE_SIZE - IFNAMSIZ - 6)) {
1473 /* not enough space for another interface_name:queue_id pair */ 1480 /* not enough space for another interface_name:queue_id pair */
@@ -1479,9 +1486,9 @@ static ssize_t bonding_show_queue_id(struct device *d,
1479 res += sprintf(buf + res, "%s:%d ", 1486 res += sprintf(buf + res, "%s:%d ",
1480 slave->dev->name, slave->queue_id); 1487 slave->dev->name, slave->queue_id);
1481 } 1488 }
1482 read_unlock(&bond->lock);
1483 if (res) 1489 if (res)
1484 buf[res-1] = '\n'; /* eat the leftover space */ 1490 buf[res-1] = '\n'; /* eat the leftover space */
1491
1485 rtnl_unlock(); 1492 rtnl_unlock();
1486 1493
1487 return res; 1494 return res;
@@ -1530,8 +1537,6 @@ static ssize_t bonding_store_queue_id(struct device *d,
1530 if (!sdev) 1537 if (!sdev)
1531 goto err_no_cmd; 1538 goto err_no_cmd;
1532 1539
1533 read_lock(&bond->lock);
1534
1535 /* Search for thes slave and check for duplicate qids */ 1540 /* Search for thes slave and check for duplicate qids */
1536 update_slave = NULL; 1541 update_slave = NULL;
1537 bond_for_each_slave(bond, slave, iter) { 1542 bond_for_each_slave(bond, slave, iter) {
@@ -1542,23 +1547,20 @@ static ssize_t bonding_store_queue_id(struct device *d,
1542 */ 1547 */
1543 update_slave = slave; 1548 update_slave = slave;
1544 else if (qid && qid == slave->queue_id) { 1549 else if (qid && qid == slave->queue_id) {
1545 goto err_no_cmd_unlock; 1550 goto err_no_cmd;
1546 } 1551 }
1547 } 1552 }
1548 1553
1549 if (!update_slave) 1554 if (!update_slave)
1550 goto err_no_cmd_unlock; 1555 goto err_no_cmd;
1551 1556
1552 /* Actually set the qids for the slave */ 1557 /* Actually set the qids for the slave */
1553 update_slave->queue_id = qid; 1558 update_slave->queue_id = qid;
1554 1559
1555 read_unlock(&bond->lock);
1556out: 1560out:
1557 rtnl_unlock(); 1561 rtnl_unlock();
1558 return ret; 1562 return ret;
1559 1563
1560err_no_cmd_unlock:
1561 read_unlock(&bond->lock);
1562err_no_cmd: 1564err_no_cmd:
1563 pr_info("invalid input for queue_id set for %s.\n", 1565 pr_info("invalid input for queue_id set for %s.\n",
1564 bond->dev->name); 1566 bond->dev->name);
@@ -1591,6 +1593,9 @@ static ssize_t bonding_store_slaves_active(struct device *d,
1591 struct list_head *iter; 1593 struct list_head *iter;
1592 struct slave *slave; 1594 struct slave *slave;
1593 1595
1596 if (!rtnl_trylock())
1597 return restart_syscall();
1598
1594 if (sscanf(buf, "%d", &new_value) != 1) { 1599 if (sscanf(buf, "%d", &new_value) != 1) {
1595 pr_err("%s: no all_slaves_active value specified.\n", 1600 pr_err("%s: no all_slaves_active value specified.\n",
1596 bond->dev->name); 1601 bond->dev->name);
@@ -1610,7 +1615,6 @@ static ssize_t bonding_store_slaves_active(struct device *d,
1610 goto out; 1615 goto out;
1611 } 1616 }
1612 1617
1613 read_lock(&bond->lock);
1614 bond_for_each_slave(bond, slave, iter) { 1618 bond_for_each_slave(bond, slave, iter) {
1615 if (!bond_is_active_slave(slave)) { 1619 if (!bond_is_active_slave(slave)) {
1616 if (new_value) 1620 if (new_value)
@@ -1619,8 +1623,8 @@ static ssize_t bonding_store_slaves_active(struct device *d,
1619 slave->inactive = 1; 1623 slave->inactive = 1;
1620 } 1624 }
1621 } 1625 }
1622 read_unlock(&bond->lock);
1623out: 1626out:
1627 rtnl_unlock();
1624 return ret; 1628 return ret;
1625} 1629}
1626static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR, 1630static DEVICE_ATTR(all_slaves_active, S_IRUGO | S_IWUSR,
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 0bd04fbda8e9..bb5c731e2560 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -464,6 +464,20 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
464 return NULL; 464 return NULL;
465} 465}
466 466
467/* Caller must hold rcu_read_lock() for read */
468static inline struct slave *bond_slave_has_mac_rcu(struct bonding *bond,
469 const u8 *mac)
470{
471 struct list_head *iter;
472 struct slave *tmp;
473
474 bond_for_each_slave_rcu(bond, tmp, iter)
475 if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
476 return tmp;
477
478 return NULL;
479}
480
467/* Check if the ip is present in arp ip list, or first free slot if ip == 0 481/* Check if the ip is present in arp ip list, or first free slot if ip == 0
468 * Returns -1 if not found, index if found 482 * Returns -1 if not found, index if found
469 */ 483 */
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index ea20182c6969..735765c21c95 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2253,7 +2253,6 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_mac);
2253int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) 2253int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
2254{ 2254{
2255 struct mlx4_priv *priv = mlx4_priv(dev); 2255 struct mlx4_priv *priv = mlx4_priv(dev);
2256 struct mlx4_vport_oper_state *vf_oper;
2257 struct mlx4_vport_state *vf_admin; 2256 struct mlx4_vport_state *vf_admin;
2258 int slave; 2257 int slave;
2259 2258
@@ -2269,7 +2268,6 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
2269 return -EINVAL; 2268 return -EINVAL;
2270 2269
2271 vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; 2270 vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
2272 vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
2273 2271
2274 if ((0 == vlan) && (0 == qos)) 2272 if ((0 == vlan) && (0 == qos))
2275 vf_admin->default_vlan = MLX4_VGT; 2273 vf_admin->default_vlan = MLX4_VGT;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index fa37b7a61213..85d91665d400 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1733,7 +1733,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
1733 1733
1734 /* Unregister Mac address for the port */ 1734 /* Unregister Mac address for the port */
1735 mlx4_en_put_qp(priv); 1735 mlx4_en_put_qp(priv);
1736 if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN)) 1736 if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN))
1737 mdev->mac_removed[priv->port] = 1; 1737 mdev->mac_removed[priv->port] = 1;
1738 1738
1739 /* Free RX Rings */ 1739 /* Free RX Rings */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 0d63daa2f422..c151e7a6710a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -652,7 +652,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
652 QUERY_DEV_CAP_RSVD_LKEY_OFFSET); 652 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
653 MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC); 653 MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
654 if (field & 1<<6) 654 if (field & 1<<6)
655 dev_cap->flags2 |= MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN; 655 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
656 MLX4_GET(dev_cap->max_icm_sz, outbox, 656 MLX4_GET(dev_cap->max_icm_sz, outbox,
657 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET); 657 QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET);
658 if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS) 658 if (dev_cap->flags & MLX4_DEV_CAP_FLAG_COUNTERS)
@@ -1713,7 +1713,6 @@ void mlx4_opreq_action(struct work_struct *work)
1713 u32 *outbox; 1713 u32 *outbox;
1714 u32 modifier; 1714 u32 modifier;
1715 u16 token; 1715 u16 token;
1716 u16 type_m;
1717 u16 type; 1716 u16 type;
1718 int err; 1717 int err;
1719 u32 num_qps; 1718 u32 num_qps;
@@ -1746,7 +1745,6 @@ void mlx4_opreq_action(struct work_struct *work)
1746 MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET); 1745 MLX4_GET(modifier, outbox, GET_OP_REQ_MODIFIER_OFFSET);
1747 MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET); 1746 MLX4_GET(token, outbox, GET_OP_REQ_TOKEN_OFFSET);
1748 MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET); 1747 MLX4_GET(type, outbox, GET_OP_REQ_TYPE_OFFSET);
1749 type_m = type >> 12;
1750 type &= 0xfff; 1748 type &= 0xfff;
1751 1749
1752 switch (type) { 1750 switch (type) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 60c9f4f103fc..179d26709c94 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -42,6 +42,7 @@
42#include <linux/io-mapping.h> 42#include <linux/io-mapping.h>
43#include <linux/delay.h> 43#include <linux/delay.h>
44#include <linux/netdevice.h> 44#include <linux/netdevice.h>
45#include <linux/kmod.h>
45 46
46#include <linux/mlx4/device.h> 47#include <linux/mlx4/device.h>
47#include <linux/mlx4/doorbell.h> 48#include <linux/mlx4/doorbell.h>
@@ -650,6 +651,27 @@ err_mem:
650 return err; 651 return err;
651} 652}
652 653
654static void mlx4_request_modules(struct mlx4_dev *dev)
655{
656 int port;
657 int has_ib_port = false;
658 int has_eth_port = false;
659#define EN_DRV_NAME "mlx4_en"
660#define IB_DRV_NAME "mlx4_ib"
661
662 for (port = 1; port <= dev->caps.num_ports; port++) {
663 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
664 has_ib_port = true;
665 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
666 has_eth_port = true;
667 }
668
669 if (has_ib_port)
670 request_module_nowait(IB_DRV_NAME);
671 if (has_eth_port)
672 request_module_nowait(EN_DRV_NAME);
673}
674
653/* 675/*
654 * Change the port configuration of the device. 676 * Change the port configuration of the device.
655 * Every user of this function must hold the port mutex. 677 * Every user of this function must hold the port mutex.
@@ -681,6 +703,11 @@ int mlx4_change_port_types(struct mlx4_dev *dev,
681 } 703 }
682 mlx4_set_port_mask(dev); 704 mlx4_set_port_mask(dev);
683 err = mlx4_register_device(dev); 705 err = mlx4_register_device(dev);
706 if (err) {
707 mlx4_err(dev, "Failed to register device\n");
708 goto out;
709 }
710 mlx4_request_modules(dev);
684 } 711 }
685 712
686out: 713out:
@@ -2305,6 +2332,8 @@ slave_start:
2305 if (err) 2332 if (err)
2306 goto err_port; 2333 goto err_port;
2307 2334
2335 mlx4_request_modules(dev);
2336
2308 mlx4_sense_init(dev); 2337 mlx4_sense_init(dev);
2309 mlx4_start_sense(dev); 2338 mlx4_start_sense(dev);
2310 2339
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 55f6245efb6c..70f0213d68c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -645,7 +645,7 @@ static const u8 __promisc_mode[] = {
645int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev, 645int mlx4_map_sw_to_hw_steering_mode(struct mlx4_dev *dev,
646 enum mlx4_net_trans_promisc_mode flow_type) 646 enum mlx4_net_trans_promisc_mode flow_type)
647{ 647{
648 if (flow_type >= MLX4_FS_MODE_NUM || flow_type < 0) { 648 if (flow_type >= MLX4_FS_MODE_NUM) {
649 mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type); 649 mlx4_err(dev, "Invalid flow type. type = %d\n", flow_type);
650 return -EINVAL; 650 return -EINVAL;
651 } 651 }
@@ -681,7 +681,7 @@ const u16 __sw_id_hw[] = {
681int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev, 681int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev,
682 enum mlx4_net_trans_rule_id id) 682 enum mlx4_net_trans_rule_id id)
683{ 683{
684 if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { 684 if (id >= MLX4_NET_TRANS_RULE_NUM) {
685 mlx4_err(dev, "Invalid network rule id. id = %d\n", id); 685 mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
686 return -EINVAL; 686 return -EINVAL;
687 } 687 }
@@ -706,7 +706,7 @@ static const int __rule_hw_sz[] = {
706int mlx4_hw_rule_sz(struct mlx4_dev *dev, 706int mlx4_hw_rule_sz(struct mlx4_dev *dev,
707 enum mlx4_net_trans_rule_id id) 707 enum mlx4_net_trans_rule_id id)
708{ 708{
709 if (id >= MLX4_NET_TRANS_RULE_NUM || id < 0) { 709 if (id >= MLX4_NET_TRANS_RULE_NUM) {
710 mlx4_err(dev, "Invalid network rule id. id = %d\n", id); 710 mlx4_err(dev, "Invalid network rule id. id = %d\n", id);
711 return -EINVAL; 711 return -EINVAL;
712 } 712 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 79fd269e2c54..9e08e35ce351 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -34,6 +34,7 @@
34#include <linux/init.h> 34#include <linux/init.h>
35 35
36#include <linux/mlx4/cmd.h> 36#include <linux/mlx4/cmd.h>
37#include <linux/mlx4/srq.h>
37#include <linux/export.h> 38#include <linux/export.h>
38#include <linux/gfp.h> 39#include <linux/gfp.h>
39 40
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5715318d6bab..55b8dec86233 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -87,9 +87,13 @@ struct pending_tx_info {
87struct xenvif_rx_meta { 87struct xenvif_rx_meta {
88 int id; 88 int id;
89 int size; 89 int size;
90 int gso_type;
90 int gso_size; 91 int gso_size;
91}; 92};
92 93
94#define GSO_BIT(type) \
95 (1 << XEN_NETIF_GSO_TYPE_ ## type)
96
93/* Discriminate from any valid pending_idx value. */ 97/* Discriminate from any valid pending_idx value. */
94#define INVALID_PENDING_IDX 0xFFFF 98#define INVALID_PENDING_IDX 0xFFFF
95 99
@@ -150,10 +154,12 @@ struct xenvif {
150 u8 fe_dev_addr[6]; 154 u8 fe_dev_addr[6];
151 155
152 /* Frontend feature information. */ 156 /* Frontend feature information. */
157 int gso_mask;
158 int gso_prefix_mask;
159
153 u8 can_sg:1; 160 u8 can_sg:1;
154 u8 gso:1; 161 u8 ip_csum:1;
155 u8 gso_prefix:1; 162 u8 ipv6_csum:1;
156 u8 csum:1;
157 163
158 /* Internal feature information. */ 164 /* Internal feature information. */
159 u8 can_queue:1; /* can queue packets for receiver? */ 165 u8 can_queue:1; /* can queue packets for receiver? */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 01bb854c7f62..e4aa26748f80 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -214,10 +214,14 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
214 214
215 if (!vif->can_sg) 215 if (!vif->can_sg)
216 features &= ~NETIF_F_SG; 216 features &= ~NETIF_F_SG;
217 if (!vif->gso && !vif->gso_prefix) 217 if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
218 features &= ~NETIF_F_TSO; 218 features &= ~NETIF_F_TSO;
219 if (!vif->csum) 219 if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
220 features &= ~NETIF_F_TSO6;
221 if (!vif->ip_csum)
220 features &= ~NETIF_F_IP_CSUM; 222 features &= ~NETIF_F_IP_CSUM;
223 if (!vif->ipv6_csum)
224 features &= ~NETIF_F_IPV6_CSUM;
221 225
222 return features; 226 return features;
223} 227}
@@ -306,7 +310,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
306 vif->domid = domid; 310 vif->domid = domid;
307 vif->handle = handle; 311 vif->handle = handle;
308 vif->can_sg = 1; 312 vif->can_sg = 1;
309 vif->csum = 1; 313 vif->ip_csum = 1;
310 vif->dev = dev; 314 vif->dev = dev;
311 315
312 vif->credit_bytes = vif->remaining_credit = ~0UL; 316 vif->credit_bytes = vif->remaining_credit = ~0UL;
@@ -316,8 +320,10 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
316 vif->credit_timeout.expires = jiffies; 320 vif->credit_timeout.expires = jiffies;
317 321
318 dev->netdev_ops = &xenvif_netdev_ops; 322 dev->netdev_ops = &xenvif_netdev_ops;
319 dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO; 323 dev->hw_features = NETIF_F_SG |
320 dev->features = dev->hw_features; 324 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
325 NETIF_F_TSO | NETIF_F_TSO6;
326 dev->features = dev->hw_features | NETIF_F_RXCSUM;
321 SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops); 327 SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
322 328
323 dev->tx_queue_len = XENVIF_QUEUE_LENGTH; 329 dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index f3e591c611de..828fdab4f1a4 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -109,15 +109,12 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx)); 109 return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
110} 110}
111 111
112/* 112/* This is a miniumum size for the linear area to avoid lots of
113 * This is the amount of packet we copy rather than map, so that the 113 * calls to __pskb_pull_tail() as we set up checksum offsets. The
114 * guest can't fiddle with the contents of the headers while we do 114 * value 128 was chosen as it covers all IPv4 and most likely
115 * packet processing on them (netfilter, routing, etc). 115 * IPv6 headers.
116 */ 116 */
117#define PKT_PROT_LEN (ETH_HLEN + \ 117#define PKT_PROT_LEN 128
118 VLAN_HLEN + \
119 sizeof(struct iphdr) + MAX_IPOPTLEN + \
120 sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
121 118
122static u16 frag_get_pending_idx(skb_frag_t *frag) 119static u16 frag_get_pending_idx(skb_frag_t *frag)
123{ 120{
@@ -145,7 +142,7 @@ static int max_required_rx_slots(struct xenvif *vif)
145 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); 142 int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE);
146 143
147 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ 144 /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */
148 if (vif->can_sg || vif->gso || vif->gso_prefix) 145 if (vif->can_sg || vif->gso_mask || vif->gso_prefix_mask)
149 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ 146 max += MAX_SKB_FRAGS + 1; /* extra_info + frags */
150 147
151 return max; 148 return max;
@@ -317,6 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
317 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 314 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
318 315
319 meta = npo->meta + npo->meta_prod++; 316 meta = npo->meta + npo->meta_prod++;
317 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
320 meta->gso_size = 0; 318 meta->gso_size = 0;
321 meta->size = 0; 319 meta->size = 0;
322 meta->id = req->id; 320 meta->id = req->id;
@@ -339,6 +337,7 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
339 struct gnttab_copy *copy_gop; 337 struct gnttab_copy *copy_gop;
340 struct xenvif_rx_meta *meta; 338 struct xenvif_rx_meta *meta;
341 unsigned long bytes; 339 unsigned long bytes;
340 int gso_type;
342 341
343 /* Data must not cross a page boundary. */ 342 /* Data must not cross a page boundary. */
344 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page)); 343 BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
@@ -397,7 +396,14 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
397 } 396 }
398 397
399 /* Leave a gap for the GSO descriptor. */ 398 /* Leave a gap for the GSO descriptor. */
400 if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix) 399 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
400 gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
401 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
402 gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
403 else
404 gso_type = XEN_NETIF_GSO_TYPE_NONE;
405
406 if (*head && ((1 << gso_type) & vif->gso_mask))
401 vif->rx.req_cons++; 407 vif->rx.req_cons++;
402 408
403 *head = 0; /* There must be something in this buffer now. */ 409 *head = 0; /* There must be something in this buffer now. */
@@ -428,14 +434,28 @@ static int xenvif_gop_skb(struct sk_buff *skb,
428 unsigned char *data; 434 unsigned char *data;
429 int head = 1; 435 int head = 1;
430 int old_meta_prod; 436 int old_meta_prod;
437 int gso_type;
438 int gso_size;
431 439
432 old_meta_prod = npo->meta_prod; 440 old_meta_prod = npo->meta_prod;
433 441
442 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
443 gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
444 gso_size = skb_shinfo(skb)->gso_size;
445 } else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
446 gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
447 gso_size = skb_shinfo(skb)->gso_size;
448 } else {
449 gso_type = XEN_NETIF_GSO_TYPE_NONE;
450 gso_size = 0;
451 }
452
434 /* Set up a GSO prefix descriptor, if necessary */ 453 /* Set up a GSO prefix descriptor, if necessary */
435 if (skb_shinfo(skb)->gso_size && vif->gso_prefix) { 454 if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) {
436 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 455 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
437 meta = npo->meta + npo->meta_prod++; 456 meta = npo->meta + npo->meta_prod++;
438 meta->gso_size = skb_shinfo(skb)->gso_size; 457 meta->gso_type = gso_type;
458 meta->gso_size = gso_size;
439 meta->size = 0; 459 meta->size = 0;
440 meta->id = req->id; 460 meta->id = req->id;
441 } 461 }
@@ -443,10 +463,13 @@ static int xenvif_gop_skb(struct sk_buff *skb,
443 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++); 463 req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
444 meta = npo->meta + npo->meta_prod++; 464 meta = npo->meta + npo->meta_prod++;
445 465
446 if (!vif->gso_prefix) 466 if ((1 << gso_type) & vif->gso_mask) {
447 meta->gso_size = skb_shinfo(skb)->gso_size; 467 meta->gso_type = gso_type;
448 else 468 meta->gso_size = gso_size;
469 } else {
470 meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
449 meta->gso_size = 0; 471 meta->gso_size = 0;
472 }
450 473
451 meta->size = 0; 474 meta->size = 0;
452 meta->id = req->id; 475 meta->id = req->id;
@@ -592,7 +615,8 @@ void xenvif_rx_action(struct xenvif *vif)
592 615
593 vif = netdev_priv(skb->dev); 616 vif = netdev_priv(skb->dev);
594 617
595 if (vif->meta[npo.meta_cons].gso_size && vif->gso_prefix) { 618 if ((1 << vif->meta[npo.meta_cons].gso_type) &
619 vif->gso_prefix_mask) {
596 resp = RING_GET_RESPONSE(&vif->rx, 620 resp = RING_GET_RESPONSE(&vif->rx,
597 vif->rx.rsp_prod_pvt++); 621 vif->rx.rsp_prod_pvt++);
598 622
@@ -629,7 +653,8 @@ void xenvif_rx_action(struct xenvif *vif)
629 vif->meta[npo.meta_cons].size, 653 vif->meta[npo.meta_cons].size,
630 flags); 654 flags);
631 655
632 if (vif->meta[npo.meta_cons].gso_size && !vif->gso_prefix) { 656 if ((1 << vif->meta[npo.meta_cons].gso_type) &
657 vif->gso_mask) {
633 struct xen_netif_extra_info *gso = 658 struct xen_netif_extra_info *gso =
634 (struct xen_netif_extra_info *) 659 (struct xen_netif_extra_info *)
635 RING_GET_RESPONSE(&vif->rx, 660 RING_GET_RESPONSE(&vif->rx,
@@ -637,8 +662,8 @@ void xenvif_rx_action(struct xenvif *vif)
637 662
638 resp->flags |= XEN_NETRXF_extra_info; 663 resp->flags |= XEN_NETRXF_extra_info;
639 664
665 gso->u.gso.type = vif->meta[npo.meta_cons].gso_type;
640 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size; 666 gso->u.gso.size = vif->meta[npo.meta_cons].gso_size;
641 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
642 gso->u.gso.pad = 0; 667 gso->u.gso.pad = 0;
643 gso->u.gso.features = 0; 668 gso->u.gso.features = 0;
644 669
@@ -1101,15 +1126,20 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1101 return -EINVAL; 1126 return -EINVAL;
1102 } 1127 }
1103 1128
1104 /* Currently only TCPv4 S.O. is supported. */ 1129 switch (gso->u.gso.type) {
1105 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { 1130 case XEN_NETIF_GSO_TYPE_TCPV4:
1131 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1132 break;
1133 case XEN_NETIF_GSO_TYPE_TCPV6:
1134 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
1135 break;
1136 default:
1106 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); 1137 netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
1107 xenvif_fatal_tx_err(vif); 1138 xenvif_fatal_tx_err(vif);
1108 return -EINVAL; 1139 return -EINVAL;
1109 } 1140 }
1110 1141
1111 skb_shinfo(skb)->gso_size = gso->u.gso.size; 1142 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1112 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1113 1143
1114 /* Header must be checked, and gso_segs computed. */ 1144 /* Header must be checked, and gso_segs computed. */
1115 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; 1145 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
@@ -1118,61 +1148,74 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
1118 return 0; 1148 return 0;
1119} 1149}
1120 1150
1121static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) 1151static inline void maybe_pull_tail(struct sk_buff *skb, unsigned int len)
1152{
1153 if (skb_is_nonlinear(skb) && skb_headlen(skb) < len) {
1154 /* If we need to pullup then pullup to the max, so we
1155 * won't need to do it again.
1156 */
1157 int target = min_t(int, skb->len, MAX_TCP_HEADER);
1158 __pskb_pull_tail(skb, target - skb_headlen(skb));
1159 }
1160}
1161
1162static int checksum_setup_ip(struct xenvif *vif, struct sk_buff *skb,
1163 int recalculate_partial_csum)
1122{ 1164{
1123 struct iphdr *iph; 1165 struct iphdr *iph = (void *)skb->data;
1166 unsigned int header_size;
1167 unsigned int off;
1124 int err = -EPROTO; 1168 int err = -EPROTO;
1125 int recalculate_partial_csum = 0;
1126 1169
1127 /* 1170 off = sizeof(struct iphdr);
1128 * A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1129 * peers can fail to set NETRXF_csum_blank when sending a GSO
1130 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1131 * recalculate the partial checksum.
1132 */
1133 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1134 vif->rx_gso_checksum_fixup++;
1135 skb->ip_summed = CHECKSUM_PARTIAL;
1136 recalculate_partial_csum = 1;
1137 }
1138 1171
1139 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */ 1172 header_size = skb->network_header + off + MAX_IPOPTLEN;
1140 if (skb->ip_summed != CHECKSUM_PARTIAL) 1173 maybe_pull_tail(skb, header_size);
1141 return 0;
1142 1174
1143 if (skb->protocol != htons(ETH_P_IP)) 1175 off = iph->ihl * 4;
1144 goto out;
1145 1176
1146 iph = (void *)skb->data;
1147 switch (iph->protocol) { 1177 switch (iph->protocol) {
1148 case IPPROTO_TCP: 1178 case IPPROTO_TCP:
1149 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1179 if (!skb_partial_csum_set(skb, off,
1150 offsetof(struct tcphdr, check))) 1180 offsetof(struct tcphdr, check)))
1151 goto out; 1181 goto out;
1152 1182
1153 if (recalculate_partial_csum) { 1183 if (recalculate_partial_csum) {
1154 struct tcphdr *tcph = tcp_hdr(skb); 1184 struct tcphdr *tcph = tcp_hdr(skb);
1185
1186 header_size = skb->network_header +
1187 off +
1188 sizeof(struct tcphdr);
1189 maybe_pull_tail(skb, header_size);
1190
1155 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1191 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1156 skb->len - iph->ihl*4, 1192 skb->len - off,
1157 IPPROTO_TCP, 0); 1193 IPPROTO_TCP, 0);
1158 } 1194 }
1159 break; 1195 break;
1160 case IPPROTO_UDP: 1196 case IPPROTO_UDP:
1161 if (!skb_partial_csum_set(skb, 4 * iph->ihl, 1197 if (!skb_partial_csum_set(skb, off,
1162 offsetof(struct udphdr, check))) 1198 offsetof(struct udphdr, check)))
1163 goto out; 1199 goto out;
1164 1200
1165 if (recalculate_partial_csum) { 1201 if (recalculate_partial_csum) {
1166 struct udphdr *udph = udp_hdr(skb); 1202 struct udphdr *udph = udp_hdr(skb);
1203
1204 header_size = skb->network_header +
1205 off +
1206 sizeof(struct udphdr);
1207 maybe_pull_tail(skb, header_size);
1208
1167 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 1209 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1168 skb->len - iph->ihl*4, 1210 skb->len - off,
1169 IPPROTO_UDP, 0); 1211 IPPROTO_UDP, 0);
1170 } 1212 }
1171 break; 1213 break;
1172 default: 1214 default:
1173 if (net_ratelimit()) 1215 if (net_ratelimit())
1174 netdev_err(vif->dev, 1216 netdev_err(vif->dev,
1175 "Attempting to checksum a non-TCP/UDP packet, dropping a protocol %d packet\n", 1217 "Attempting to checksum a non-TCP/UDP packet, "
1218 "dropping a protocol %d packet\n",
1176 iph->protocol); 1219 iph->protocol);
1177 goto out; 1220 goto out;
1178 } 1221 }
@@ -1183,6 +1226,158 @@ out:
1183 return err; 1226 return err;
1184} 1227}
1185 1228
1229static int checksum_setup_ipv6(struct xenvif *vif, struct sk_buff *skb,
1230 int recalculate_partial_csum)
1231{
1232 int err = -EPROTO;
1233 struct ipv6hdr *ipv6h = (void *)skb->data;
1234 u8 nexthdr;
1235 unsigned int header_size;
1236 unsigned int off;
1237 bool fragment;
1238 bool done;
1239
1240 done = false;
1241
1242 off = sizeof(struct ipv6hdr);
1243
1244 header_size = skb->network_header + off;
1245 maybe_pull_tail(skb, header_size);
1246
1247 nexthdr = ipv6h->nexthdr;
1248
1249 while ((off <= sizeof(struct ipv6hdr) + ntohs(ipv6h->payload_len)) &&
1250 !done) {
1251 switch (nexthdr) {
1252 case IPPROTO_DSTOPTS:
1253 case IPPROTO_HOPOPTS:
1254 case IPPROTO_ROUTING: {
1255 struct ipv6_opt_hdr *hp = (void *)(skb->data + off);
1256
1257 header_size = skb->network_header +
1258 off +
1259 sizeof(struct ipv6_opt_hdr);
1260 maybe_pull_tail(skb, header_size);
1261
1262 nexthdr = hp->nexthdr;
1263 off += ipv6_optlen(hp);
1264 break;
1265 }
1266 case IPPROTO_AH: {
1267 struct ip_auth_hdr *hp = (void *)(skb->data + off);
1268
1269 header_size = skb->network_header +
1270 off +
1271 sizeof(struct ip_auth_hdr);
1272 maybe_pull_tail(skb, header_size);
1273
1274 nexthdr = hp->nexthdr;
1275 off += (hp->hdrlen+2)<<2;
1276 break;
1277 }
1278 case IPPROTO_FRAGMENT:
1279 fragment = true;
1280 /* fall through */
1281 default:
1282 done = true;
1283 break;
1284 }
1285 }
1286
1287 if (!done) {
1288 if (net_ratelimit())
1289 netdev_err(vif->dev, "Failed to parse packet header\n");
1290 goto out;
1291 }
1292
1293 if (fragment) {
1294 if (net_ratelimit())
1295 netdev_err(vif->dev, "Packet is a fragment!\n");
1296 goto out;
1297 }
1298
1299 switch (nexthdr) {
1300 case IPPROTO_TCP:
1301 if (!skb_partial_csum_set(skb, off,
1302 offsetof(struct tcphdr, check)))
1303 goto out;
1304
1305 if (recalculate_partial_csum) {
1306 struct tcphdr *tcph = tcp_hdr(skb);
1307
1308 header_size = skb->network_header +
1309 off +
1310 sizeof(struct tcphdr);
1311 maybe_pull_tail(skb, header_size);
1312
1313 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1314 &ipv6h->daddr,
1315 skb->len - off,
1316 IPPROTO_TCP, 0);
1317 }
1318 break;
1319 case IPPROTO_UDP:
1320 if (!skb_partial_csum_set(skb, off,
1321 offsetof(struct udphdr, check)))
1322 goto out;
1323
1324 if (recalculate_partial_csum) {
1325 struct udphdr *udph = udp_hdr(skb);
1326
1327 header_size = skb->network_header +
1328 off +
1329 sizeof(struct udphdr);
1330 maybe_pull_tail(skb, header_size);
1331
1332 udph->check = ~csum_ipv6_magic(&ipv6h->saddr,
1333 &ipv6h->daddr,
1334 skb->len - off,
1335 IPPROTO_UDP, 0);
1336 }
1337 break;
1338 default:
1339 if (net_ratelimit())
1340 netdev_err(vif->dev,
1341 "Attempting to checksum a non-TCP/UDP packet, "
1342 "dropping a protocol %d packet\n",
1343 nexthdr);
1344 goto out;
1345 }
1346
1347 err = 0;
1348
1349out:
1350 return err;
1351}
1352
1353static int checksum_setup(struct xenvif *vif, struct sk_buff *skb)
1354{
1355 int err = -EPROTO;
1356 int recalculate_partial_csum = 0;
1357
1358 /* A GSO SKB must be CHECKSUM_PARTIAL. However some buggy
1359 * peers can fail to set NETRXF_csum_blank when sending a GSO
1360 * frame. In this case force the SKB to CHECKSUM_PARTIAL and
1361 * recalculate the partial checksum.
1362 */
1363 if (skb->ip_summed != CHECKSUM_PARTIAL && skb_is_gso(skb)) {
1364 vif->rx_gso_checksum_fixup++;
1365 skb->ip_summed = CHECKSUM_PARTIAL;
1366 recalculate_partial_csum = 1;
1367 }
1368
1369 /* A non-CHECKSUM_PARTIAL SKB does not require setup. */
1370 if (skb->ip_summed != CHECKSUM_PARTIAL)
1371 return 0;
1372
1373 if (skb->protocol == htons(ETH_P_IP))
1374 err = checksum_setup_ip(vif, skb, recalculate_partial_csum);
1375 else if (skb->protocol == htons(ETH_P_IPV6))
1376 err = checksum_setup_ipv6(vif, skb, recalculate_partial_csum);
1377
1378 return err;
1379}
1380
1186static bool tx_credit_exceeded(struct xenvif *vif, unsigned size) 1381static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
1187{ 1382{
1188 unsigned long now = jiffies; 1383 unsigned long now = jiffies;
@@ -1428,12 +1623,7 @@ static int xenvif_tx_submit(struct xenvif *vif, int budget)
1428 1623
1429 xenvif_fill_frags(vif, skb); 1624 xenvif_fill_frags(vif, skb);
1430 1625
1431 /* 1626 if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
1432 * If the initial fragment was < PKT_PROT_LEN then
1433 * pull through some bytes from the other fragments to
1434 * increase the linear region to PKT_PROT_LEN bytes.
1435 */
1436 if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
1437 int target = min_t(int, skb->len, PKT_PROT_LEN); 1627 int target = min_t(int, skb->len, PKT_PROT_LEN);
1438 __pskb_pull_tail(skb, target - skb_headlen(skb)); 1628 __pskb_pull_tail(skb, target - skb_headlen(skb));
1439 } 1629 }
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 1b08d8798372..f0358992b04f 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -105,6 +105,22 @@ static int netback_probe(struct xenbus_device *dev,
105 goto abort_transaction; 105 goto abort_transaction;
106 } 106 }
107 107
108 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
109 "%d", sg);
110 if (err) {
111 message = "writing feature-gso-tcpv6";
112 goto abort_transaction;
113 }
114
115 /* We support partial checksum setup for IPv6 packets */
116 err = xenbus_printf(xbt, dev->nodename,
117 "feature-ipv6-csum-offload",
118 "%d", 1);
119 if (err) {
120 message = "writing feature-ipv6-csum-offload";
121 goto abort_transaction;
122 }
123
108 /* We support rx-copy path. */ 124 /* We support rx-copy path. */
109 err = xenbus_printf(xbt, dev->nodename, 125 err = xenbus_printf(xbt, dev->nodename,
110 "feature-rx-copy", "%d", 1); 126 "feature-rx-copy", "%d", 1);
@@ -561,20 +577,50 @@ static int connect_rings(struct backend_info *be)
561 val = 0; 577 val = 0;
562 vif->can_sg = !!val; 578 vif->can_sg = !!val;
563 579
580 vif->gso_mask = 0;
581 vif->gso_prefix_mask = 0;
582
564 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", 583 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
565 "%d", &val) < 0) 584 "%d", &val) < 0)
566 val = 0; 585 val = 0;
567 vif->gso = !!val; 586 if (val)
587 vif->gso_mask |= GSO_BIT(TCPV4);
568 588
569 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix", 589 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
570 "%d", &val) < 0) 590 "%d", &val) < 0)
571 val = 0; 591 val = 0;
572 vif->gso_prefix = !!val; 592 if (val)
593 vif->gso_prefix_mask |= GSO_BIT(TCPV4);
594
595 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
596 "%d", &val) < 0)
597 val = 0;
598 if (val)
599 vif->gso_mask |= GSO_BIT(TCPV6);
600
601 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix",
602 "%d", &val) < 0)
603 val = 0;
604 if (val)
605 vif->gso_prefix_mask |= GSO_BIT(TCPV6);
606
607 if (vif->gso_mask & vif->gso_prefix_mask) {
608 xenbus_dev_fatal(dev, err,
609 "%s: gso and gso prefix flags are not "
610 "mutually exclusive",
611 dev->otherend);
612 return -EOPNOTSUPP;
613 }
573 614
574 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload", 615 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
575 "%d", &val) < 0) 616 "%d", &val) < 0)
576 val = 0; 617 val = 0;
577 vif->csum = !val; 618 vif->ip_csum = !val;
619
620 if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ipv6-csum-offload",
621 "%d", &val) < 0)
622 val = 0;
623 vif->ipv6_csum = !!val;
578 624
579 /* Map the shared frame, irq etc. */ 625 /* Map the shared frame, irq etc. */
580 err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, 626 err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref,
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index cd1fdf75103b..8df61bc5da00 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -154,10 +154,6 @@ enum {
154 MLX4_CMD_QUERY_IF_STAT = 0X54, 154 MLX4_CMD_QUERY_IF_STAT = 0X54,
155 MLX4_CMD_SET_IF_STAT = 0X55, 155 MLX4_CMD_SET_IF_STAT = 0X55,
156 156
157 /* set port opcode modifiers */
158 MLX4_SET_PORT_PRIO2TC = 0x8,
159 MLX4_SET_PORT_SCHEDULER = 0x9,
160
161 /* register/delete flow steering network rules */ 157 /* register/delete flow steering network rules */
162 MLX4_QP_FLOW_STEERING_ATTACH = 0x65, 158 MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
163 MLX4_QP_FLOW_STEERING_DETACH = 0x66, 159 MLX4_QP_FLOW_STEERING_DETACH = 0x66,
@@ -182,6 +178,8 @@ enum {
182 MLX4_SET_PORT_VLAN_TABLE = 0x3, 178 MLX4_SET_PORT_VLAN_TABLE = 0x3,
183 MLX4_SET_PORT_PRIO_MAP = 0x4, 179 MLX4_SET_PORT_PRIO_MAP = 0x4,
184 MLX4_SET_PORT_GID_TABLE = 0x5, 180 MLX4_SET_PORT_GID_TABLE = 0x5,
181 MLX4_SET_PORT_PRIO2TC = 0x8,
182 MLX4_SET_PORT_SCHEDULER = 0x9,
185}; 183};
186 184
187enum { 185enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 24ce6bdd540e..9ad0c18495ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -155,7 +155,7 @@ enum {
155 MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1, 155 MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1,
156 MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2, 156 MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2,
157 MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3, 157 MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3,
158 MLX4_DEV_CAP_FLAGS2_REASSIGN_MAC_EN = 1LL << 4, 158 MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN = 1LL << 4,
159 MLX4_DEV_CAP_FLAG2_TS = 1LL << 5, 159 MLX4_DEV_CAP_FLAG2_TS = 1LL << 5,
160 MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6, 160 MLX4_DEV_CAP_FLAG2_VLAN_CONTROL = 1LL << 6,
161 MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7, 161 MLX4_DEV_CAP_FLAG2_FSM = 1LL << 7,
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 61223c52414f..2077489f9887 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -42,7 +42,8 @@ int netfilter_init(void);
42 42
43struct sk_buff; 43struct sk_buff;
44 44
45typedef unsigned int nf_hookfn(unsigned int hooknum, 45struct nf_hook_ops;
46typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops,
46 struct sk_buff *skb, 47 struct sk_buff *skb,
47 const struct net_device *in, 48 const struct net_device *in,
48 const struct net_device *out, 49 const struct net_device *out,
@@ -52,12 +53,13 @@ struct nf_hook_ops {
52 struct list_head list; 53 struct list_head list;
53 54
54 /* User fills in from here down. */ 55 /* User fills in from here down. */
55 nf_hookfn *hook; 56 nf_hookfn *hook;
56 struct module *owner; 57 struct module *owner;
57 u_int8_t pf; 58 void *priv;
58 unsigned int hooknum; 59 u_int8_t pf;
60 unsigned int hooknum;
59 /* Hooks are ordered in ascending priority. */ 61 /* Hooks are ordered in ascending priority. */
60 int priority; 62 int priority;
61}; 63};
62 64
63struct nf_sockopt_ops { 65struct nf_sockopt_ops {
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 4f68cd7141d2..28c74367e900 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -14,6 +14,9 @@ struct nfnl_callback {
14 int (*call_rcu)(struct sock *nl, struct sk_buff *skb, 14 int (*call_rcu)(struct sock *nl, struct sk_buff *skb,
15 const struct nlmsghdr *nlh, 15 const struct nlmsghdr *nlh,
16 const struct nlattr * const cda[]); 16 const struct nlattr * const cda[]);
17 int (*call_batch)(struct sock *nl, struct sk_buff *skb,
18 const struct nlmsghdr *nlh,
19 const struct nlattr * const cda[]);
17 const struct nla_policy *policy; /* netlink attribute policy */ 20 const struct nla_policy *policy; /* netlink attribute policy */
18 const u_int16_t attr_count; /* number of nlattr's */ 21 const u_int16_t attr_count; /* number of nlattr's */
19}; 22};
@@ -23,6 +26,8 @@ struct nfnetlink_subsystem {
23 __u8 subsys_id; /* nfnetlink subsystem ID */ 26 __u8 subsys_id; /* nfnetlink subsystem ID */
24 __u8 cb_count; /* number of callbacks */ 27 __u8 cb_count; /* number of callbacks */
25 const struct nfnl_callback *cb; /* callback for individual types */ 28 const struct nfnl_callback *cb; /* callback for individual types */
29 int (*commit)(struct sk_buff *skb);
30 int (*abort)(struct sk_buff *skb);
26}; 31};
27 32
28int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n); 33int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n);
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index b647c6270eb7..71c6e264e5b5 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -135,7 +135,7 @@ struct inet_timewait_sock {
135 tw_transparent : 1, 135 tw_transparent : 1,
136 tw_pad : 6, /* 6 bits hole */ 136 tw_pad : 6, /* 6 bits hole */
137 tw_tos : 8, 137 tw_tos : 8,
138 tw_pad2 : 16 /* 16 bits hole */ 138 tw_pad2 : 16; /* 16 bits hole */
139 kmemcheck_bitfield_end(flags); 139 kmemcheck_bitfield_end(flags);
140 u32 tw_ttd; 140 u32 tw_ttd;
141 struct inet_bind_bucket *tw_tb; 141 struct inet_bind_bucket *tw_tb;
diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h
index 94c852d47d0f..11417475a6c3 100644
--- a/include/net/irda/irda_device.h
+++ b/include/net/irda/irda_device.h
@@ -162,7 +162,7 @@ typedef struct {
162 int irq, irq2; /* Interrupts used */ 162 int irq, irq2; /* Interrupts used */
163 int dma, dma2; /* DMA channel(s) used */ 163 int dma, dma2; /* DMA channel(s) used */
164 int fifo_size; /* FIFO size */ 164 int fifo_size; /* FIFO size */
165 int irqflags; /* interrupt flags (ie, IRQF_SHARED|IRQF_DISABLED) */ 165 int irqflags; /* interrupt flags (ie, IRQF_SHARED) */
166 int direction; /* Link direction, used by some FIR drivers */ 166 int direction; /* Link direction, used by some FIR drivers */
167 int enabled; /* Powered on? */ 167 int enabled; /* Powered on? */
168 int suspended; /* Suspended by APM */ 168 int suspended; /* Suspended by APM */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bcc4a8ed4450..da68c9a90ac5 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -22,6 +22,7 @@
22#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 22#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
23#include <net/netns/conntrack.h> 23#include <net/netns/conntrack.h>
24#endif 24#endif
25#include <net/netns/nftables.h>
25#include <net/netns/xfrm.h> 26#include <net/netns/xfrm.h>
26 27
27struct user_namespace; 28struct user_namespace;
@@ -101,6 +102,9 @@ struct net {
101#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 102#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
102 struct netns_ct ct; 103 struct netns_ct ct;
103#endif 104#endif
105#if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE)
106 struct netns_nftables nft;
107#endif
104#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 108#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
105 struct netns_nf_frag nf_frag; 109 struct netns_nf_frag nf_frag;
106#endif 110#endif
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index c29b4e545f87..07eaaf604092 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -45,6 +45,9 @@ unsigned int nf_nat_setup_info(struct nf_conn *ct,
45 const struct nf_nat_range *range, 45 const struct nf_nat_range *range,
46 enum nf_nat_manip_type maniptype); 46 enum nf_nat_manip_type maniptype);
47 47
48extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct,
49 unsigned int hooknum);
50
48/* Is this tuple already taken? (not by us)*/ 51/* Is this tuple already taken? (not by us)*/
49int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, 52int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
50 const struct nf_conn *ignored_conntrack); 53 const struct nf_conn *ignored_conntrack);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
new file mode 100644
index 000000000000..54c4a5cafb64
--- /dev/null
+++ b/include/net/netfilter/nf_tables.h
@@ -0,0 +1,522 @@
1#ifndef _NET_NF_TABLES_H
2#define _NET_NF_TABLES_H
3
4#include <linux/list.h>
5#include <linux/netfilter.h>
6#include <linux/netfilter/x_tables.h>
7#include <linux/netfilter/nf_tables.h>
8#include <net/netlink.h>
9
10#define NFT_JUMP_STACK_SIZE 16
11
12struct nft_pktinfo {
13 struct sk_buff *skb;
14 const struct net_device *in;
15 const struct net_device *out;
16 u8 hooknum;
17 u8 nhoff;
18 u8 thoff;
19 /* for x_tables compatibility */
20 struct xt_action_param xt;
21};
22
23static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
24 const struct nf_hook_ops *ops,
25 struct sk_buff *skb,
26 const struct net_device *in,
27 const struct net_device *out)
28{
29 pkt->skb = skb;
30 pkt->in = pkt->xt.in = in;
31 pkt->out = pkt->xt.out = out;
32 pkt->hooknum = pkt->xt.hooknum = ops->hooknum;
33 pkt->xt.family = ops->pf;
34}
35
36struct nft_data {
37 union {
38 u32 data[4];
39 struct {
40 u32 verdict;
41 struct nft_chain *chain;
42 };
43 };
44} __attribute__((aligned(__alignof__(u64))));
45
46static inline int nft_data_cmp(const struct nft_data *d1,
47 const struct nft_data *d2,
48 unsigned int len)
49{
50 return memcmp(d1->data, d2->data, len);
51}
52
53static inline void nft_data_copy(struct nft_data *dst,
54 const struct nft_data *src)
55{
56 BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64));
57 *(u64 *)&dst->data[0] = *(u64 *)&src->data[0];
58 *(u64 *)&dst->data[2] = *(u64 *)&src->data[2];
59}
60
61static inline void nft_data_debug(const struct nft_data *data)
62{
63 pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n",
64 data->data[0], data->data[1],
65 data->data[2], data->data[3]);
66}
67
68/**
69 * struct nft_ctx - nf_tables rule/set context
70 *
71 * @net: net namespace
72 * @skb: netlink skb
73 * @nlh: netlink message header
74 * @afi: address family info
75 * @table: the table the chain is contained in
76 * @chain: the chain the rule is contained in
77 * @nla: netlink attributes
78 */
79struct nft_ctx {
80 struct net *net;
81 const struct sk_buff *skb;
82 const struct nlmsghdr *nlh;
83 const struct nft_af_info *afi;
84 const struct nft_table *table;
85 const struct nft_chain *chain;
86 const struct nlattr * const *nla;
87};
88
89struct nft_data_desc {
90 enum nft_data_types type;
91 unsigned int len;
92};
93
94extern int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
95 struct nft_data_desc *desc, const struct nlattr *nla);
96extern void nft_data_uninit(const struct nft_data *data,
97 enum nft_data_types type);
98extern int nft_data_dump(struct sk_buff *skb, int attr,
99 const struct nft_data *data,
100 enum nft_data_types type, unsigned int len);
101
102static inline enum nft_data_types nft_dreg_to_type(enum nft_registers reg)
103{
104 return reg == NFT_REG_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE;
105}
106
107static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
108{
109 return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1;
110}
111
112extern int nft_validate_input_register(enum nft_registers reg);
113extern int nft_validate_output_register(enum nft_registers reg);
114extern int nft_validate_data_load(const struct nft_ctx *ctx,
115 enum nft_registers reg,
116 const struct nft_data *data,
117 enum nft_data_types type);
118
119/**
120 * struct nft_set_elem - generic representation of set elements
121 *
122 * @cookie: implementation specific element cookie
123 * @key: element key
124 * @data: element data (maps only)
125 * @flags: element flags (end of interval)
126 *
127 * The cookie can be used to store a handle to the element for subsequent
128 * removal.
129 */
130struct nft_set_elem {
131 void *cookie;
132 struct nft_data key;
133 struct nft_data data;
134 u32 flags;
135};
136
137struct nft_set;
138struct nft_set_iter {
139 unsigned int count;
140 unsigned int skip;
141 int err;
142 int (*fn)(const struct nft_ctx *ctx,
143 const struct nft_set *set,
144 const struct nft_set_iter *iter,
145 const struct nft_set_elem *elem);
146};
147
148/**
149 * struct nft_set_ops - nf_tables set operations
150 *
151 * @lookup: look up an element within the set
152 * @insert: insert new element into set
153 * @remove: remove element from set
154 * @walk: iterate over all set elemeennts
155 * @privsize: function to return size of set private data
156 * @init: initialize private data of new set instance
157 * @destroy: destroy private data of set instance
158 * @list: nf_tables_set_ops list node
159 * @owner: module reference
160 * @features: features supported by the implementation
161 */
162struct nft_set_ops {
163 bool (*lookup)(const struct nft_set *set,
164 const struct nft_data *key,
165 struct nft_data *data);
166 int (*get)(const struct nft_set *set,
167 struct nft_set_elem *elem);
168 int (*insert)(const struct nft_set *set,
169 const struct nft_set_elem *elem);
170 void (*remove)(const struct nft_set *set,
171 const struct nft_set_elem *elem);
172 void (*walk)(const struct nft_ctx *ctx,
173 const struct nft_set *set,
174 struct nft_set_iter *iter);
175
176 unsigned int (*privsize)(const struct nlattr * const nla[]);
177 int (*init)(const struct nft_set *set,
178 const struct nlattr * const nla[]);
179 void (*destroy)(const struct nft_set *set);
180
181 struct list_head list;
182 struct module *owner;
183 u32 features;
184};
185
186extern int nft_register_set(struct nft_set_ops *ops);
187extern void nft_unregister_set(struct nft_set_ops *ops);
188
189/**
190 * struct nft_set - nf_tables set instance
191 *
192 * @list: table set list node
193 * @bindings: list of set bindings
194 * @name: name of the set
195 * @ktype: key type (numeric type defined by userspace, not used in the kernel)
196 * @dtype: data type (verdict or numeric type defined by userspace)
197 * @ops: set ops
198 * @flags: set flags
199 * @klen: key length
200 * @dlen: data length
201 * @data: private set data
202 */
203struct nft_set {
204 struct list_head list;
205 struct list_head bindings;
206 char name[IFNAMSIZ];
207 u32 ktype;
208 u32 dtype;
209 /* runtime data below here */
210 const struct nft_set_ops *ops ____cacheline_aligned;
211 u16 flags;
212 u8 klen;
213 u8 dlen;
214 unsigned char data[]
215 __attribute__((aligned(__alignof__(u64))));
216};
217
218static inline void *nft_set_priv(const struct nft_set *set)
219{
220 return (void *)set->data;
221}
222
223extern struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
224 const struct nlattr *nla);
225
226/**
227 * struct nft_set_binding - nf_tables set binding
228 *
229 * @list: set bindings list node
230 * @chain: chain containing the rule bound to the set
231 *
232 * A set binding contains all information necessary for validation
233 * of new elements added to a bound set.
234 */
235struct nft_set_binding {
236 struct list_head list;
237 const struct nft_chain *chain;
238};
239
240extern int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
241 struct nft_set_binding *binding);
242extern void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
243 struct nft_set_binding *binding);
244
245
246/**
247 * struct nft_expr_type - nf_tables expression type
248 *
249 * @select_ops: function to select nft_expr_ops
250 * @ops: default ops, used when no select_ops functions is present
251 * @list: used internally
252 * @name: Identifier
253 * @owner: module reference
254 * @policy: netlink attribute policy
255 * @maxattr: highest netlink attribute number
256 */
257struct nft_expr_type {
258 const struct nft_expr_ops *(*select_ops)(const struct nft_ctx *,
259 const struct nlattr * const tb[]);
260 const struct nft_expr_ops *ops;
261 struct list_head list;
262 const char *name;
263 struct module *owner;
264 const struct nla_policy *policy;
265 unsigned int maxattr;
266};
267
268/**
269 * struct nft_expr_ops - nf_tables expression operations
270 *
271 * @eval: Expression evaluation function
272 * @size: full expression size, including private data size
273 * @init: initialization function
274 * @destroy: destruction function
275 * @dump: function to dump parameters
276 * @type: expression type
277 * @validate: validate expression, called during loop detection
278 * @data: extra data to attach to this expression operation
279 */
280struct nft_expr;
281struct nft_expr_ops {
282 void (*eval)(const struct nft_expr *expr,
283 struct nft_data data[NFT_REG_MAX + 1],
284 const struct nft_pktinfo *pkt);
285 unsigned int size;
286
287 int (*init)(const struct nft_ctx *ctx,
288 const struct nft_expr *expr,
289 const struct nlattr * const tb[]);
290 void (*destroy)(const struct nft_expr *expr);
291 int (*dump)(struct sk_buff *skb,
292 const struct nft_expr *expr);
293 int (*validate)(const struct nft_ctx *ctx,
294 const struct nft_expr *expr,
295 const struct nft_data **data);
296 const struct nft_expr_type *type;
297 void *data;
298};
299
300#define NFT_EXPR_MAXATTR 16
301#define NFT_EXPR_SIZE(size) (sizeof(struct nft_expr) + \
302 ALIGN(size, __alignof__(struct nft_expr)))
303
304/**
305 * struct nft_expr - nf_tables expression
306 *
307 * @ops: expression ops
308 * @data: expression private data
309 */
310struct nft_expr {
311 const struct nft_expr_ops *ops;
312 unsigned char data[];
313};
314
315static inline void *nft_expr_priv(const struct nft_expr *expr)
316{
317 return (void *)expr->data;
318}
319
320/**
321 * struct nft_rule - nf_tables rule
322 *
323 * @list: used internally
324 * @rcu_head: used internally for rcu
325 * @handle: rule handle
326 * @genmask: generation mask
327 * @dlen: length of expression data
328 * @data: expression data
329 */
330struct nft_rule {
331 struct list_head list;
332 struct rcu_head rcu_head;
333 u64 handle:46,
334 genmask:2,
335 dlen:16;
336 unsigned char data[]
337 __attribute__((aligned(__alignof__(struct nft_expr))));
338};
339
340/**
341 * struct nft_rule_trans - nf_tables rule update in transaction
342 *
343 * @list: used internally
344 * @rule: rule that needs to be updated
345 * @chain: chain that this rule belongs to
346 * @table: table for which this chain applies
347 * @nlh: netlink header of the message that contain this update
348 * @family: family expressesed as AF_*
349 */
350struct nft_rule_trans {
351 struct list_head list;
352 struct nft_rule *rule;
353 const struct nft_chain *chain;
354 const struct nft_table *table;
355 const struct nlmsghdr *nlh;
356 u8 family;
357};
358
359static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
360{
361 return (struct nft_expr *)&rule->data[0];
362}
363
364static inline struct nft_expr *nft_expr_next(const struct nft_expr *expr)
365{
366 return ((void *)expr) + expr->ops->size;
367}
368
369static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
370{
371 return (struct nft_expr *)&rule->data[rule->dlen];
372}
373
374/*
375 * The last pointer isn't really necessary, but the compiler isn't able to
376 * determine that the result of nft_expr_last() is always the same since it
377 * can't assume that the dlen value wasn't changed within calls in the loop.
378 */
379#define nft_rule_for_each_expr(expr, last, rule) \
380 for ((expr) = nft_expr_first(rule), (last) = nft_expr_last(rule); \
381 (expr) != (last); \
382 (expr) = nft_expr_next(expr))
383
384enum nft_chain_flags {
385 NFT_BASE_CHAIN = 0x1,
386};
387
388/**
389 * struct nft_chain - nf_tables chain
390 *
391 * @rules: list of rules in the chain
392 * @list: used internally
393 * @rcu_head: used internally
394 * @net: net namespace that this chain belongs to
395 * @table: table that this chain belongs to
396 * @handle: chain handle
397 * @flags: bitmask of enum nft_chain_flags
398 * @use: number of jump references to this chain
399 * @level: length of longest path to this chain
400 * @name: name of the chain
401 */
402struct nft_chain {
403 struct list_head rules;
404 struct list_head list;
405 struct rcu_head rcu_head;
406 struct net *net;
407 struct nft_table *table;
408 u64 handle;
409 u8 flags;
410 u16 use;
411 u16 level;
412 char name[NFT_CHAIN_MAXNAMELEN];
413};
414
415enum nft_chain_type {
416 NFT_CHAIN_T_DEFAULT = 0,
417 NFT_CHAIN_T_ROUTE,
418 NFT_CHAIN_T_NAT,
419 NFT_CHAIN_T_MAX
420};
421
422struct nft_stats {
423 u64 bytes;
424 u64 pkts;
425};
426
427/**
428 * struct nft_base_chain - nf_tables base chain
429 *
430 * @ops: netfilter hook ops
431 * @type: chain type
432 * @policy: default policy
433 * @stats: per-cpu chain stats
434 * @chain: the chain
435 */
436struct nft_base_chain {
437 struct nf_hook_ops ops;
438 enum nft_chain_type type;
439 u8 policy;
440 struct nft_stats __percpu *stats;
441 struct nft_chain chain;
442};
443
444static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
445{
446 return container_of(chain, struct nft_base_chain, chain);
447}
448
449extern unsigned int nft_do_chain_pktinfo(struct nft_pktinfo *pkt,
450 const struct nf_hook_ops *ops);
451
452/**
453 * struct nft_table - nf_tables table
454 *
455 * @list: used internally
456 * @chains: chains in the table
457 * @sets: sets in the table
458 * @hgenerator: handle generator state
459 * @use: number of chain references to this table
460 * @flags: table flag (see enum nft_table_flags)
461 * @name: name of the table
462 */
463struct nft_table {
464 struct list_head list;
465 struct list_head chains;
466 struct list_head sets;
467 u64 hgenerator;
468 u32 use;
469 u16 flags;
470 char name[];
471};
472
473/**
474 * struct nft_af_info - nf_tables address family info
475 *
476 * @list: used internally
477 * @family: address family
478 * @nhooks: number of hooks in this family
479 * @owner: module owner
480 * @tables: used internally
481 * @hooks: hookfn overrides for packet validation
482 */
483struct nft_af_info {
484 struct list_head list;
485 int family;
486 unsigned int nhooks;
487 struct module *owner;
488 struct list_head tables;
489 nf_hookfn *hooks[NF_MAX_HOOKS];
490};
491
492extern int nft_register_afinfo(struct net *, struct nft_af_info *);
493extern void nft_unregister_afinfo(struct nft_af_info *);
494
495struct nf_chain_type {
496 unsigned int hook_mask;
497 const char *name;
498 enum nft_chain_type type;
499 nf_hookfn *fn[NF_MAX_HOOKS];
500 struct module *me;
501 int family;
502};
503
504extern int nft_register_chain_type(struct nf_chain_type *);
505extern void nft_unregister_chain_type(struct nf_chain_type *);
506
507extern int nft_register_expr(struct nft_expr_type *);
508extern void nft_unregister_expr(struct nft_expr_type *);
509
510#define MODULE_ALIAS_NFT_FAMILY(family) \
511 MODULE_ALIAS("nft-afinfo-" __stringify(family))
512
513#define MODULE_ALIAS_NFT_CHAIN(family, name) \
514 MODULE_ALIAS("nft-chain-" __stringify(family) "-" name)
515
516#define MODULE_ALIAS_NFT_EXPR(name) \
517 MODULE_ALIAS("nft-expr-" name)
518
519#define MODULE_ALIAS_NFT_SET() \
520 MODULE_ALIAS("nft-set")
521
522#endif /* _NET_NF_TABLES_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
new file mode 100644
index 000000000000..fe7b16206a4e
--- /dev/null
+++ b/include/net/netfilter/nf_tables_core.h
@@ -0,0 +1,42 @@
1#ifndef _NET_NF_TABLES_CORE_H
2#define _NET_NF_TABLES_CORE_H
3
4extern int nf_tables_core_module_init(void);
5extern void nf_tables_core_module_exit(void);
6
7extern int nft_immediate_module_init(void);
8extern void nft_immediate_module_exit(void);
9
10struct nft_cmp_fast_expr {
11 u32 data;
12 enum nft_registers sreg:8;
13 u8 len;
14};
15
16extern const struct nft_expr_ops nft_cmp_fast_ops;
17
18extern int nft_cmp_module_init(void);
19extern void nft_cmp_module_exit(void);
20
21extern int nft_lookup_module_init(void);
22extern void nft_lookup_module_exit(void);
23
24extern int nft_bitwise_module_init(void);
25extern void nft_bitwise_module_exit(void);
26
27extern int nft_byteorder_module_init(void);
28extern void nft_byteorder_module_exit(void);
29
30struct nft_payload {
31 enum nft_payload_bases base:8;
32 u8 offset;
33 u8 len;
34 enum nft_registers dreg:8;
35};
36
37extern const struct nft_expr_ops nft_payload_fast_ops;
38
39extern int nft_payload_module_init(void);
40extern void nft_payload_module_exit(void);
41
42#endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
new file mode 100644
index 000000000000..1be1c2c197ee
--- /dev/null
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -0,0 +1,23 @@
1#ifndef _NF_TABLES_IPV4_H_
2#define _NF_TABLES_IPV4_H_
3
4#include <net/netfilter/nf_tables.h>
5#include <net/ip.h>
6
7static inline void
8nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
9 const struct nf_hook_ops *ops,
10 struct sk_buff *skb,
11 const struct net_device *in,
12 const struct net_device *out)
13{
14 struct iphdr *ip;
15
16 nft_set_pktinfo(pkt, ops, skb, in, out);
17
18 pkt->xt.thoff = ip_hdrlen(pkt->skb);
19 ip = ip_hdr(pkt->skb);
20 pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
21}
22
23#endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
new file mode 100644
index 000000000000..4a9b88a65963
--- /dev/null
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -0,0 +1,30 @@
1#ifndef _NF_TABLES_IPV6_H_
2#define _NF_TABLES_IPV6_H_
3
4#include <linux/netfilter_ipv6/ip6_tables.h>
5#include <net/ipv6.h>
6
7static inline int
8nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
9 const struct nf_hook_ops *ops,
10 struct sk_buff *skb,
11 const struct net_device *in,
12 const struct net_device *out)
13{
14 int protohdr, thoff = 0;
15 unsigned short frag_off;
16
17 nft_set_pktinfo(pkt, ops, skb, in, out);
18
19 protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
20 /* If malformed, drop it */
21 if (protohdr < 0)
22 return -1;
23
24 pkt->xt.thoff = thoff;
25 pkt->xt.fragoff = frag_off;
26
27 return 0;
28}
29
30#endif
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
new file mode 100644
index 000000000000..15d056d534e3
--- /dev/null
+++ b/include/net/netns/nftables.h
@@ -0,0 +1,19 @@
1#ifndef _NETNS_NFTABLES_H_
2#define _NETNS_NFTABLES_H_
3
4#include <linux/list.h>
5
6struct nft_af_info;
7
8struct netns_nftables {
9 struct list_head af_info;
10 struct list_head commit_list;
11 struct nft_af_info *ipv4;
12 struct nft_af_info *ipv6;
13 struct nft_af_info *arp;
14 struct nft_af_info *bridge;
15 u8 gencursor;
16 u8 genctr;
17};
18
19#endif
diff --git a/include/net/route.h b/include/net/route.h
index 0ad8e0102386..dd4ae0029fd8 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -88,22 +88,14 @@ struct ip_rt_acct {
88}; 88};
89 89
90struct rt_cache_stat { 90struct rt_cache_stat {
91 unsigned int in_hit;
92 unsigned int in_slow_tot; 91 unsigned int in_slow_tot;
93 unsigned int in_slow_mc; 92 unsigned int in_slow_mc;
94 unsigned int in_no_route; 93 unsigned int in_no_route;
95 unsigned int in_brd; 94 unsigned int in_brd;
96 unsigned int in_martian_dst; 95 unsigned int in_martian_dst;
97 unsigned int in_martian_src; 96 unsigned int in_martian_src;
98 unsigned int out_hit;
99 unsigned int out_slow_tot; 97 unsigned int out_slow_tot;
100 unsigned int out_slow_mc; 98 unsigned int out_slow_mc;
101 unsigned int gc_total;
102 unsigned int gc_ignored;
103 unsigned int gc_goal_miss;
104 unsigned int gc_dst_overflow;
105 unsigned int in_hlist_search;
106 unsigned int out_hlist_search;
107}; 99};
108 100
109extern struct ip_rt_acct __percpu *ip_rt_acct; 101extern struct ip_rt_acct __percpu *ip_rt_acct;
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
index 174915420d3f..17c3af2c4bb9 100644
--- a/include/uapi/linux/netfilter/Kbuild
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -5,6 +5,8 @@ header-y += nf_conntrack_ftp.h
5header-y += nf_conntrack_sctp.h 5header-y += nf_conntrack_sctp.h
6header-y += nf_conntrack_tcp.h 6header-y += nf_conntrack_tcp.h
7header-y += nf_conntrack_tuple_common.h 7header-y += nf_conntrack_tuple_common.h
8header-y += nf_tables.h
9header-y += nf_tables_compat.h
8header-y += nf_nat.h 10header-y += nf_nat.h
9header-y += nfnetlink.h 11header-y += nfnetlink.h
10header-y += nfnetlink_acct.h 12header-y += nfnetlink_acct.h
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index 8dd803818ebe..319f47128db8 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -25,6 +25,10 @@ enum ip_conntrack_info {
25 IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 25 IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
26}; 26};
27 27
28#define NF_CT_STATE_INVALID_BIT (1 << 0)
29#define NF_CT_STATE_BIT(ctinfo) (1 << ((ctinfo) % IP_CT_IS_REPLY + 1))
30#define NF_CT_STATE_UNTRACKED_BIT (1 << (IP_CT_NUMBER + 1))
31
28/* Bitset representing status of connection. */ 32/* Bitset representing status of connection. */
29enum ip_conntrack_status { 33enum ip_conntrack_status {
30 /* It's an expected connection: bit 0 set. This bit never changed */ 34 /* It's an expected connection: bit 0 set. This bit never changed */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
new file mode 100644
index 000000000000..fbfd229a8e99
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -0,0 +1,718 @@
1#ifndef _LINUX_NF_TABLES_H
2#define _LINUX_NF_TABLES_H
3
4#define NFT_CHAIN_MAXNAMELEN 32
5
6enum nft_registers {
7 NFT_REG_VERDICT,
8 NFT_REG_1,
9 NFT_REG_2,
10 NFT_REG_3,
11 NFT_REG_4,
12 __NFT_REG_MAX
13};
14#define NFT_REG_MAX (__NFT_REG_MAX - 1)
15
16/**
17 * enum nft_verdicts - nf_tables internal verdicts
18 *
19 * @NFT_CONTINUE: continue evaluation of the current rule
20 * @NFT_BREAK: terminate evaluation of the current rule
21 * @NFT_JUMP: push the current chain on the jump stack and jump to a chain
22 * @NFT_GOTO: jump to a chain without pushing the current chain on the jump stack
23 * @NFT_RETURN: return to the topmost chain on the jump stack
24 *
25 * The nf_tables verdicts share their numeric space with the netfilter verdicts.
26 */
27enum nft_verdicts {
28 NFT_CONTINUE = -1,
29 NFT_BREAK = -2,
30 NFT_JUMP = -3,
31 NFT_GOTO = -4,
32 NFT_RETURN = -5,
33};
34
35/**
36 * enum nf_tables_msg_types - nf_tables netlink message types
37 *
38 * @NFT_MSG_NEWTABLE: create a new table (enum nft_table_attributes)
39 * @NFT_MSG_GETTABLE: get a table (enum nft_table_attributes)
40 * @NFT_MSG_DELTABLE: delete a table (enum nft_table_attributes)
41 * @NFT_MSG_NEWCHAIN: create a new chain (enum nft_chain_attributes)
42 * @NFT_MSG_GETCHAIN: get a chain (enum nft_chain_attributes)
43 * @NFT_MSG_DELCHAIN: delete a chain (enum nft_chain_attributes)
44 * @NFT_MSG_NEWRULE: create a new rule (enum nft_rule_attributes)
45 * @NFT_MSG_GETRULE: get a rule (enum nft_rule_attributes)
46 * @NFT_MSG_DELRULE: delete a rule (enum nft_rule_attributes)
47 * @NFT_MSG_NEWSET: create a new set (enum nft_set_attributes)
48 * @NFT_MSG_GETSET: get a set (enum nft_set_attributes)
49 * @NFT_MSG_DELSET: delete a set (enum nft_set_attributes)
50 * @NFT_MSG_NEWSETELEM: create a new set element (enum nft_set_elem_attributes)
51 * @NFT_MSG_GETSETELEM: get a set element (enum nft_set_elem_attributes)
52 * @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
53 */
54enum nf_tables_msg_types {
55 NFT_MSG_NEWTABLE,
56 NFT_MSG_GETTABLE,
57 NFT_MSG_DELTABLE,
58 NFT_MSG_NEWCHAIN,
59 NFT_MSG_GETCHAIN,
60 NFT_MSG_DELCHAIN,
61 NFT_MSG_NEWRULE,
62 NFT_MSG_GETRULE,
63 NFT_MSG_DELRULE,
64 NFT_MSG_NEWSET,
65 NFT_MSG_GETSET,
66 NFT_MSG_DELSET,
67 NFT_MSG_NEWSETELEM,
68 NFT_MSG_GETSETELEM,
69 NFT_MSG_DELSETELEM,
70 NFT_MSG_MAX,
71};
72
73/**
74 * enum nft_list_attributes - nf_tables generic list netlink attributes
75 *
76 * @NFTA_LIST_ELEM: list element (NLA_NESTED)
77 */
78enum nft_list_attributes {
79 NFTA_LIST_UNPEC,
80 NFTA_LIST_ELEM,
81 __NFTA_LIST_MAX
82};
83#define NFTA_LIST_MAX (__NFTA_LIST_MAX - 1)
84
85/**
86 * enum nft_hook_attributes - nf_tables netfilter hook netlink attributes
87 *
88 * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
89 * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
90 */
91enum nft_hook_attributes {
92 NFTA_HOOK_UNSPEC,
93 NFTA_HOOK_HOOKNUM,
94 NFTA_HOOK_PRIORITY,
95 __NFTA_HOOK_MAX
96};
97#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1)
98
99/**
100 * enum nft_table_flags - nf_tables table flags
101 *
102 * @NFT_TABLE_F_DORMANT: this table is not active
103 */
104enum nft_table_flags {
105 NFT_TABLE_F_DORMANT = 0x1,
106};
107
108/**
109 * enum nft_table_attributes - nf_tables table netlink attributes
110 *
111 * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
112 * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
113 */
114enum nft_table_attributes {
115 NFTA_TABLE_UNSPEC,
116 NFTA_TABLE_NAME,
117 NFTA_TABLE_FLAGS,
118 __NFTA_TABLE_MAX
119};
120#define NFTA_TABLE_MAX (__NFTA_TABLE_MAX - 1)
121
122/**
123 * enum nft_chain_attributes - nf_tables chain netlink attributes
124 *
125 * @NFTA_CHAIN_TABLE: name of the table containing the chain (NLA_STRING)
126 * @NFTA_CHAIN_HANDLE: numeric handle of the chain (NLA_U64)
127 * @NFTA_CHAIN_NAME: name of the chain (NLA_STRING)
128 * @NFTA_CHAIN_HOOK: hook specification for basechains (NLA_NESTED: nft_hook_attributes)
129 * @NFTA_CHAIN_POLICY: numeric policy of the chain (NLA_U32)
130 * @NFTA_CHAIN_USE: number of references to this chain (NLA_U32)
131 * @NFTA_CHAIN_TYPE: type name of the string (NLA_NUL_STRING)
132 * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes)
133 */
134enum nft_chain_attributes {
135 NFTA_CHAIN_UNSPEC,
136 NFTA_CHAIN_TABLE,
137 NFTA_CHAIN_HANDLE,
138 NFTA_CHAIN_NAME,
139 NFTA_CHAIN_HOOK,
140 NFTA_CHAIN_POLICY,
141 NFTA_CHAIN_USE,
142 NFTA_CHAIN_TYPE,
143 NFTA_CHAIN_COUNTERS,
144 __NFTA_CHAIN_MAX
145};
146#define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1)
147
148/**
149 * enum nft_rule_attributes - nf_tables rule netlink attributes
150 *
151 * @NFTA_RULE_TABLE: name of the table containing the rule (NLA_STRING)
152 * @NFTA_RULE_CHAIN: name of the chain containing the rule (NLA_STRING)
153 * @NFTA_RULE_HANDLE: numeric handle of the rule (NLA_U64)
154 * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
155 * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes)
156 * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
157 */
158enum nft_rule_attributes {
159 NFTA_RULE_UNSPEC,
160 NFTA_RULE_TABLE,
161 NFTA_RULE_CHAIN,
162 NFTA_RULE_HANDLE,
163 NFTA_RULE_EXPRESSIONS,
164 NFTA_RULE_COMPAT,
165 NFTA_RULE_POSITION,
166 __NFTA_RULE_MAX
167};
168#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1)
169
170/**
171 * enum nft_rule_compat_flags - nf_tables rule compat flags
172 *
173 * @NFT_RULE_COMPAT_F_INV: invert the check result
174 */
175enum nft_rule_compat_flags {
176 NFT_RULE_COMPAT_F_INV = (1 << 1),
177 NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV,
178};
179
180/**
181 * enum nft_rule_compat_attributes - nf_tables rule compat attributes
182 *
183 * @NFTA_RULE_COMPAT_PROTO: numerice value of handled protocol (NLA_U32)
184 * @NFTA_RULE_COMPAT_FLAGS: bitmask of enum nft_rule_compat_flags (NLA_U32)
185 */
186enum nft_rule_compat_attributes {
187 NFTA_RULE_COMPAT_UNSPEC,
188 NFTA_RULE_COMPAT_PROTO,
189 NFTA_RULE_COMPAT_FLAGS,
190 __NFTA_RULE_COMPAT_MAX
191};
192#define NFTA_RULE_COMPAT_MAX (__NFTA_RULE_COMPAT_MAX - 1)
193
194/**
195 * enum nft_set_flags - nf_tables set flags
196 *
197 * @NFT_SET_ANONYMOUS: name allocation, automatic cleanup on unlink
198 * @NFT_SET_CONSTANT: set contents may not change while bound
199 * @NFT_SET_INTERVAL: set contains intervals
200 * @NFT_SET_MAP: set is used as a dictionary
201 */
202enum nft_set_flags {
203 NFT_SET_ANONYMOUS = 0x1,
204 NFT_SET_CONSTANT = 0x2,
205 NFT_SET_INTERVAL = 0x4,
206 NFT_SET_MAP = 0x8,
207};
208
209/**
210 * enum nft_set_attributes - nf_tables set netlink attributes
211 *
212 * @NFTA_SET_TABLE: table name (NLA_STRING)
213 * @NFTA_SET_NAME: set name (NLA_STRING)
214 * @NFTA_SET_FLAGS: bitmask of enum nft_set_flags (NLA_U32)
215 * @NFTA_SET_KEY_TYPE: key data type, informational purpose only (NLA_U32)
216 * @NFTA_SET_KEY_LEN: key data length (NLA_U32)
217 * @NFTA_SET_DATA_TYPE: mapping data type (NLA_U32)
218 * @NFTA_SET_DATA_LEN: mapping data length (NLA_U32)
219 */
220enum nft_set_attributes {
221 NFTA_SET_UNSPEC,
222 NFTA_SET_TABLE,
223 NFTA_SET_NAME,
224 NFTA_SET_FLAGS,
225 NFTA_SET_KEY_TYPE,
226 NFTA_SET_KEY_LEN,
227 NFTA_SET_DATA_TYPE,
228 NFTA_SET_DATA_LEN,
229 __NFTA_SET_MAX
230};
231#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
232
233/**
234 * enum nft_set_elem_flags - nf_tables set element flags
235 *
236 * @NFT_SET_ELEM_INTERVAL_END: element ends the previous interval
237 */
238enum nft_set_elem_flags {
239 NFT_SET_ELEM_INTERVAL_END = 0x1,
240};
241
242/**
243 * enum nft_set_elem_attributes - nf_tables set element netlink attributes
244 *
245 * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
246 * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
247 * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
248 */
249enum nft_set_elem_attributes {
250 NFTA_SET_ELEM_UNSPEC,
251 NFTA_SET_ELEM_KEY,
252 NFTA_SET_ELEM_DATA,
253 NFTA_SET_ELEM_FLAGS,
254 __NFTA_SET_ELEM_MAX
255};
256#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1)
257
258/**
259 * enum nft_set_elem_list_attributes - nf_tables set element list netlink attributes
260 *
261 * @NFTA_SET_ELEM_LIST_TABLE: table of the set to be changed (NLA_STRING)
262 * @NFTA_SET_ELEM_LIST_SET: name of the set to be changed (NLA_STRING)
263 * @NFTA_SET_ELEM_LIST_ELEMENTS: list of set elements (NLA_NESTED: nft_set_elem_attributes)
264 */
265enum nft_set_elem_list_attributes {
266 NFTA_SET_ELEM_LIST_UNSPEC,
267 NFTA_SET_ELEM_LIST_TABLE,
268 NFTA_SET_ELEM_LIST_SET,
269 NFTA_SET_ELEM_LIST_ELEMENTS,
270 __NFTA_SET_ELEM_LIST_MAX
271};
272#define NFTA_SET_ELEM_LIST_MAX (__NFTA_SET_ELEM_LIST_MAX - 1)
273
274/**
275 * enum nft_data_types - nf_tables data types
276 *
277 * @NFT_DATA_VALUE: generic data
278 * @NFT_DATA_VERDICT: netfilter verdict
279 *
280 * The type of data is usually determined by the kernel directly and is not
281 * explicitly specified by userspace. The only difference are sets, where
282 * userspace specifies the key and mapping data types.
283 *
284 * The values 0xffffff00-0xffffffff are reserved for internally used types.
285 * The remaining range can be freely used by userspace to encode types, all
286 * values are equivalent to NFT_DATA_VALUE.
287 */
288enum nft_data_types {
289 NFT_DATA_VALUE,
290 NFT_DATA_VERDICT = 0xffffff00U,
291};
292
293#define NFT_DATA_RESERVED_MASK 0xffffff00U
294
295/**
296 * enum nft_data_attributes - nf_tables data netlink attributes
297 *
298 * @NFTA_DATA_VALUE: generic data (NLA_BINARY)
299 * @NFTA_DATA_VERDICT: nf_tables verdict (NLA_NESTED: nft_verdict_attributes)
300 */
301enum nft_data_attributes {
302 NFTA_DATA_UNSPEC,
303 NFTA_DATA_VALUE,
304 NFTA_DATA_VERDICT,
305 __NFTA_DATA_MAX
306};
307#define NFTA_DATA_MAX (__NFTA_DATA_MAX - 1)
308
309/**
310 * enum nft_verdict_attributes - nf_tables verdict netlink attributes
311 *
312 * @NFTA_VERDICT_CODE: nf_tables verdict (NLA_U32: enum nft_verdicts)
313 * @NFTA_VERDICT_CHAIN: jump target chain name (NLA_STRING)
314 */
315enum nft_verdict_attributes {
316 NFTA_VERDICT_UNSPEC,
317 NFTA_VERDICT_CODE,
318 NFTA_VERDICT_CHAIN,
319 __NFTA_VERDICT_MAX
320};
321#define NFTA_VERDICT_MAX (__NFTA_VERDICT_MAX - 1)
322
323/**
324 * enum nft_expr_attributes - nf_tables expression netlink attributes
325 *
326 * @NFTA_EXPR_NAME: name of the expression type (NLA_STRING)
327 * @NFTA_EXPR_DATA: type specific data (NLA_NESTED)
328 */
329enum nft_expr_attributes {
330 NFTA_EXPR_UNSPEC,
331 NFTA_EXPR_NAME,
332 NFTA_EXPR_DATA,
333 __NFTA_EXPR_MAX
334};
335#define NFTA_EXPR_MAX (__NFTA_EXPR_MAX - 1)
336
337/**
338 * enum nft_immediate_attributes - nf_tables immediate expression netlink attributes
339 *
340 * @NFTA_IMMEDIATE_DREG: destination register to load data into (NLA_U32)
341 * @NFTA_IMMEDIATE_DATA: data to load (NLA_NESTED: nft_data_attributes)
342 */
343enum nft_immediate_attributes {
344 NFTA_IMMEDIATE_UNSPEC,
345 NFTA_IMMEDIATE_DREG,
346 NFTA_IMMEDIATE_DATA,
347 __NFTA_IMMEDIATE_MAX
348};
349#define NFTA_IMMEDIATE_MAX (__NFTA_IMMEDIATE_MAX - 1)
350
351/**
352 * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes
353 *
354 * @NFTA_BITWISE_SREG: source register (NLA_U32: nft_registers)
355 * @NFTA_BITWISE_DREG: destination register (NLA_U32: nft_registers)
356 * @NFTA_BITWISE_LEN: length of operands (NLA_U32)
357 * @NFTA_BITWISE_MASK: mask value (NLA_NESTED: nft_data_attributes)
358 * @NFTA_BITWISE_XOR: xor value (NLA_NESTED: nft_data_attributes)
359 *
360 * The bitwise expression performs the following operation:
361 *
362 * dreg = (sreg & mask) ^ xor
363 *
364 * which allow to express all bitwise operations:
365 *
366 * mask xor
367 * NOT: 1 1
368 * OR: 0 x
369 * XOR: 1 x
370 * AND: x 0
371 */
372enum nft_bitwise_attributes {
373 NFTA_BITWISE_UNSPEC,
374 NFTA_BITWISE_SREG,
375 NFTA_BITWISE_DREG,
376 NFTA_BITWISE_LEN,
377 NFTA_BITWISE_MASK,
378 NFTA_BITWISE_XOR,
379 __NFTA_BITWISE_MAX
380};
381#define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1)
382
383/**
384 * enum nft_byteorder_ops - nf_tables byteorder operators
385 *
386 * @NFT_BYTEORDER_NTOH: network to host operator
387 * @NFT_BYTEORDER_HTON: host to network opertaor
388 */
389enum nft_byteorder_ops {
390 NFT_BYTEORDER_NTOH,
391 NFT_BYTEORDER_HTON,
392};
393
394/**
395 * enum nft_byteorder_attributes - nf_tables byteorder expression netlink attributes
396 *
397 * @NFTA_BYTEORDER_SREG: source register (NLA_U32: nft_registers)
398 * @NFTA_BYTEORDER_DREG: destination register (NLA_U32: nft_registers)
399 * @NFTA_BYTEORDER_OP: operator (NLA_U32: enum nft_byteorder_ops)
400 * @NFTA_BYTEORDER_LEN: length of the data (NLA_U32)
401 * @NFTA_BYTEORDER_SIZE: data size in bytes (NLA_U32: 2 or 4)
402 */
403enum nft_byteorder_attributes {
404 NFTA_BYTEORDER_UNSPEC,
405 NFTA_BYTEORDER_SREG,
406 NFTA_BYTEORDER_DREG,
407 NFTA_BYTEORDER_OP,
408 NFTA_BYTEORDER_LEN,
409 NFTA_BYTEORDER_SIZE,
410 __NFTA_BYTEORDER_MAX
411};
412#define NFTA_BYTEORDER_MAX (__NFTA_BYTEORDER_MAX - 1)
413
414/**
415 * enum nft_cmp_ops - nf_tables relational operator
416 *
417 * @NFT_CMP_EQ: equal
418 * @NFT_CMP_NEQ: not equal
419 * @NFT_CMP_LT: less than
420 * @NFT_CMP_LTE: less than or equal to
421 * @NFT_CMP_GT: greater than
422 * @NFT_CMP_GTE: greater than or equal to
423 */
424enum nft_cmp_ops {
425 NFT_CMP_EQ,
426 NFT_CMP_NEQ,
427 NFT_CMP_LT,
428 NFT_CMP_LTE,
429 NFT_CMP_GT,
430 NFT_CMP_GTE,
431};
432
433/**
434 * enum nft_cmp_attributes - nf_tables cmp expression netlink attributes
435 *
436 * @NFTA_CMP_SREG: source register of data to compare (NLA_U32: nft_registers)
437 * @NFTA_CMP_OP: cmp operation (NLA_U32: nft_cmp_ops)
438 * @NFTA_CMP_DATA: data to compare against (NLA_NESTED: nft_data_attributes)
439 */
440enum nft_cmp_attributes {
441 NFTA_CMP_UNSPEC,
442 NFTA_CMP_SREG,
443 NFTA_CMP_OP,
444 NFTA_CMP_DATA,
445 __NFTA_CMP_MAX
446};
447#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1)
448
449/**
450 * enum nft_lookup_attributes - nf_tables set lookup expression netlink attributes
451 *
452 * @NFTA_LOOKUP_SET: name of the set where to look for (NLA_STRING)
453 * @NFTA_LOOKUP_SREG: source register of the data to look for (NLA_U32: nft_registers)
454 * @NFTA_LOOKUP_DREG: destination register (NLA_U32: nft_registers)
455 */
456enum nft_lookup_attributes {
457 NFTA_LOOKUP_UNSPEC,
458 NFTA_LOOKUP_SET,
459 NFTA_LOOKUP_SREG,
460 NFTA_LOOKUP_DREG,
461 __NFTA_LOOKUP_MAX
462};
463#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1)
464
465/**
466 * enum nft_payload_bases - nf_tables payload expression offset bases
467 *
468 * @NFT_PAYLOAD_LL_HEADER: link layer header
469 * @NFT_PAYLOAD_NETWORK_HEADER: network header
470 * @NFT_PAYLOAD_TRANSPORT_HEADER: transport header
471 */
472enum nft_payload_bases {
473 NFT_PAYLOAD_LL_HEADER,
474 NFT_PAYLOAD_NETWORK_HEADER,
475 NFT_PAYLOAD_TRANSPORT_HEADER,
476};
477
478/**
479 * enum nft_payload_attributes - nf_tables payload expression netlink attributes
480 *
481 * @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers)
482 * @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases)
483 * @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32)
484 * @NFTA_PAYLOAD_LEN: payload length (NLA_U32)
485 */
486enum nft_payload_attributes {
487 NFTA_PAYLOAD_UNSPEC,
488 NFTA_PAYLOAD_DREG,
489 NFTA_PAYLOAD_BASE,
490 NFTA_PAYLOAD_OFFSET,
491 NFTA_PAYLOAD_LEN,
492 __NFTA_PAYLOAD_MAX
493};
494#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1)
495
496/**
497 * enum nft_exthdr_attributes - nf_tables IPv6 extension header expression netlink attributes
498 *
499 * @NFTA_EXTHDR_DREG: destination register (NLA_U32: nft_registers)
500 * @NFTA_EXTHDR_TYPE: extension header type (NLA_U8)
501 * @NFTA_EXTHDR_OFFSET: extension header offset (NLA_U32)
502 * @NFTA_EXTHDR_LEN: extension header length (NLA_U32)
503 */
504enum nft_exthdr_attributes {
505 NFTA_EXTHDR_UNSPEC,
506 NFTA_EXTHDR_DREG,
507 NFTA_EXTHDR_TYPE,
508 NFTA_EXTHDR_OFFSET,
509 NFTA_EXTHDR_LEN,
510 __NFTA_EXTHDR_MAX
511};
512#define NFTA_EXTHDR_MAX (__NFTA_EXTHDR_MAX - 1)
513
514/**
515 * enum nft_meta_keys - nf_tables meta expression keys
516 *
517 * @NFT_META_LEN: packet length (skb->len)
518 * @NFT_META_PROTOCOL: packet ethertype protocol (skb->protocol), invalid in OUTPUT
519 * @NFT_META_PRIORITY: packet priority (skb->priority)
520 * @NFT_META_MARK: packet mark (skb->mark)
521 * @NFT_META_IIF: packet input interface index (dev->ifindex)
522 * @NFT_META_OIF: packet output interface index (dev->ifindex)
523 * @NFT_META_IIFNAME: packet input interface name (dev->name)
524 * @NFT_META_OIFNAME: packet output interface name (dev->name)
525 * @NFT_META_IIFTYPE: packet input interface type (dev->type)
526 * @NFT_META_OIFTYPE: packet output interface type (dev->type)
527 * @NFT_META_SKUID: originating socket UID (fsuid)
528 * @NFT_META_SKGID: originating socket GID (fsgid)
529 * @NFT_META_NFTRACE: packet nftrace bit
530 * @NFT_META_RTCLASSID: realm value of packet's route (skb->dst->tclassid)
531 * @NFT_META_SECMARK: packet secmark (skb->secmark)
532 */
533enum nft_meta_keys {
534 NFT_META_LEN,
535 NFT_META_PROTOCOL,
536 NFT_META_PRIORITY,
537 NFT_META_MARK,
538 NFT_META_IIF,
539 NFT_META_OIF,
540 NFT_META_IIFNAME,
541 NFT_META_OIFNAME,
542 NFT_META_IIFTYPE,
543 NFT_META_OIFTYPE,
544 NFT_META_SKUID,
545 NFT_META_SKGID,
546 NFT_META_NFTRACE,
547 NFT_META_RTCLASSID,
548 NFT_META_SECMARK,
549};
550
551/**
552 * enum nft_meta_attributes - nf_tables meta expression netlink attributes
553 *
554 * @NFTA_META_DREG: destination register (NLA_U32)
555 * @NFTA_META_KEY: meta data item to load (NLA_U32: nft_meta_keys)
556 */
557enum nft_meta_attributes {
558 NFTA_META_UNSPEC,
559 NFTA_META_DREG,
560 NFTA_META_KEY,
561 __NFTA_META_MAX
562};
563#define NFTA_META_MAX (__NFTA_META_MAX - 1)
564
565/**
566 * enum nft_ct_keys - nf_tables ct expression keys
567 *
568 * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info)
569 * @NFT_CT_DIRECTION: conntrack direction (enum ip_conntrack_dir)
570 * @NFT_CT_STATUS: conntrack status (bitmask of enum ip_conntrack_status)
571 * @NFT_CT_MARK: conntrack mark value
572 * @NFT_CT_SECMARK: conntrack secmark value
573 * @NFT_CT_EXPIRATION: relative conntrack expiration time in ms
574 * @NFT_CT_HELPER: connection tracking helper assigned to conntrack
575 * @NFT_CT_L3PROTOCOL: conntrack layer 3 protocol
576 * @NFT_CT_SRC: conntrack layer 3 protocol source (IPv4/IPv6 address)
577 * @NFT_CT_DST: conntrack layer 3 protocol destination (IPv4/IPv6 address)
578 * @NFT_CT_PROTOCOL: conntrack layer 4 protocol
579 * @NFT_CT_PROTO_SRC: conntrack layer 4 protocol source
580 * @NFT_CT_PROTO_DST: conntrack layer 4 protocol destination
581 */
582enum nft_ct_keys {
583 NFT_CT_STATE,
584 NFT_CT_DIRECTION,
585 NFT_CT_STATUS,
586 NFT_CT_MARK,
587 NFT_CT_SECMARK,
588 NFT_CT_EXPIRATION,
589 NFT_CT_HELPER,
590 NFT_CT_L3PROTOCOL,
591 NFT_CT_SRC,
592 NFT_CT_DST,
593 NFT_CT_PROTOCOL,
594 NFT_CT_PROTO_SRC,
595 NFT_CT_PROTO_DST,
596};
597
598/**
599 * enum nft_ct_attributes - nf_tables ct expression netlink attributes
600 *
601 * @NFTA_CT_DREG: destination register (NLA_U32)
602 * @NFTA_CT_KEY: conntrack data item to load (NLA_U32: nft_ct_keys)
603 * @NFTA_CT_DIRECTION: direction in case of directional keys (NLA_U8)
604 */
605enum nft_ct_attributes {
606 NFTA_CT_UNSPEC,
607 NFTA_CT_DREG,
608 NFTA_CT_KEY,
609 NFTA_CT_DIRECTION,
610 __NFTA_CT_MAX
611};
612#define NFTA_CT_MAX (__NFTA_CT_MAX - 1)
613
614/**
615 * enum nft_limit_attributes - nf_tables limit expression netlink attributes
616 *
617 * @NFTA_LIMIT_RATE: refill rate (NLA_U64)
618 * @NFTA_LIMIT_UNIT: refill unit (NLA_U64)
619 */
620enum nft_limit_attributes {
621 NFTA_LIMIT_UNSPEC,
622 NFTA_LIMIT_RATE,
623 NFTA_LIMIT_UNIT,
624 __NFTA_LIMIT_MAX
625};
626#define NFTA_LIMIT_MAX (__NFTA_LIMIT_MAX - 1)
627
628/**
629 * enum nft_counter_attributes - nf_tables counter expression netlink attributes
630 *
631 * @NFTA_COUNTER_BYTES: number of bytes (NLA_U64)
632 * @NFTA_COUNTER_PACKETS: number of packets (NLA_U64)
633 */
634enum nft_counter_attributes {
635 NFTA_COUNTER_UNSPEC,
636 NFTA_COUNTER_BYTES,
637 NFTA_COUNTER_PACKETS,
638 __NFTA_COUNTER_MAX
639};
640#define NFTA_COUNTER_MAX (__NFTA_COUNTER_MAX - 1)
641
642/**
643 * enum nft_log_attributes - nf_tables log expression netlink attributes
644 *
645 * @NFTA_LOG_GROUP: netlink group to send messages to (NLA_U32)
646 * @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING)
647 * @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32)
648 * @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32)
649 */
650enum nft_log_attributes {
651 NFTA_LOG_UNSPEC,
652 NFTA_LOG_GROUP,
653 NFTA_LOG_PREFIX,
654 NFTA_LOG_SNAPLEN,
655 NFTA_LOG_QTHRESHOLD,
656 __NFTA_LOG_MAX
657};
658#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1)
659
660/**
661 * enum nft_reject_types - nf_tables reject expression reject types
662 *
663 * @NFT_REJECT_ICMP_UNREACH: reject using ICMP unreachable
664 * @NFT_REJECT_TCP_RST: reject using TCP RST
665 */
666enum nft_reject_types {
667 NFT_REJECT_ICMP_UNREACH,
668 NFT_REJECT_TCP_RST,
669};
670
671/**
672 * enum nft_reject_attributes - nf_tables reject expression netlink attributes
673 *
674 * @NFTA_REJECT_TYPE: packet type to use (NLA_U32: nft_reject_types)
675 * @NFTA_REJECT_ICMP_CODE: ICMP code to use (NLA_U8)
676 */
677enum nft_reject_attributes {
678 NFTA_REJECT_UNSPEC,
679 NFTA_REJECT_TYPE,
680 NFTA_REJECT_ICMP_CODE,
681 __NFTA_REJECT_MAX
682};
683#define NFTA_REJECT_MAX (__NFTA_REJECT_MAX - 1)
684
685/**
686 * enum nft_nat_types - nf_tables nat expression NAT types
687 *
688 * @NFT_NAT_SNAT: source NAT
689 * @NFT_NAT_DNAT: destination NAT
690 */
691enum nft_nat_types {
692 NFT_NAT_SNAT,
693 NFT_NAT_DNAT,
694};
695
696/**
697 * enum nft_nat_attributes - nf_tables nat expression netlink attributes
698 *
699 * @NFTA_NAT_TYPE: NAT type (NLA_U32: nft_nat_types)
700 * @NFTA_NAT_FAMILY: NAT family (NLA_U32)
701 * @NFTA_NAT_REG_ADDR_MIN: source register of address range start (NLA_U32: nft_registers)
702 * @NFTA_NAT_REG_ADDR_MAX: source register of address range end (NLA_U32: nft_registers)
703 * @NFTA_NAT_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
704 * @NFTA_NAT_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
705 */
706enum nft_nat_attributes {
707 NFTA_NAT_UNSPEC,
708 NFTA_NAT_TYPE,
709 NFTA_NAT_FAMILY,
710 NFTA_NAT_REG_ADDR_MIN,
711 NFTA_NAT_REG_ADDR_MAX,
712 NFTA_NAT_REG_PROTO_MIN,
713 NFTA_NAT_REG_PROTO_MAX,
714 __NFTA_NAT_MAX
715};
716#define NFTA_NAT_MAX (__NFTA_NAT_MAX - 1)
717
718#endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nf_tables_compat.h b/include/uapi/linux/netfilter/nf_tables_compat.h
new file mode 100644
index 000000000000..8310f5f76551
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_tables_compat.h
@@ -0,0 +1,38 @@
1#ifndef _NFT_COMPAT_NFNETLINK_H_
2#define _NFT_COMPAT_NFNETLINK_H_
3
4enum nft_target_attributes {
5 NFTA_TARGET_UNSPEC,
6 NFTA_TARGET_NAME,
7 NFTA_TARGET_REV,
8 NFTA_TARGET_INFO,
9 __NFTA_TARGET_MAX
10};
11#define NFTA_TARGET_MAX (__NFTA_TARGET_MAX - 1)
12
13enum nft_match_attributes {
14 NFTA_MATCH_UNSPEC,
15 NFTA_MATCH_NAME,
16 NFTA_MATCH_REV,
17 NFTA_MATCH_INFO,
18 __NFTA_MATCH_MAX
19};
20#define NFTA_MATCH_MAX (__NFTA_MATCH_MAX - 1)
21
22#define NFT_COMPAT_NAME_MAX 32
23
24enum {
25 NFNL_MSG_COMPAT_GET,
26 NFNL_MSG_COMPAT_MAX
27};
28
29enum {
30 NFTA_COMPAT_UNSPEC = 0,
31 NFTA_COMPAT_NAME,
32 NFTA_COMPAT_REV,
33 NFTA_COMPAT_TYPE,
34 __NFTA_COMPAT_MAX,
35};
36#define NFTA_COMPAT_MAX (__NFTA_COMPAT_MAX - 1)
37
38#endif
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 4a4efafad5f4..596ddd45253c 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -18,6 +18,8 @@ enum nfnetlink_groups {
18#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE 18#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE
19 NFNLGRP_CONNTRACK_EXP_DESTROY, 19 NFNLGRP_CONNTRACK_EXP_DESTROY,
20#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY 20#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY
21 NFNLGRP_NFTABLES,
22#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES
21 __NFNLGRP_MAX, 23 __NFNLGRP_MAX,
22}; 24};
23#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) 25#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
@@ -51,6 +53,12 @@ struct nfgenmsg {
51#define NFNL_SUBSYS_ACCT 7 53#define NFNL_SUBSYS_ACCT 7
52#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 54#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8
53#define NFNL_SUBSYS_CTHELPER 9 55#define NFNL_SUBSYS_CTHELPER 9
54#define NFNL_SUBSYS_COUNT 10 56#define NFNL_SUBSYS_NFTABLES 10
57#define NFNL_SUBSYS_NFT_COMPAT 11
58#define NFNL_SUBSYS_COUNT 12
59
60/* Reserved control nfnetlink messages */
61#define NFNL_MSG_BATCH_BEGIN NLMSG_MIN_TYPE
62#define NFNL_MSG_BATCH_END NLMSG_MIN_TYPE+1
55 63
56#endif /* _UAPI_NFNETLINK_H */ 64#endif /* _UAPI_NFNETLINK_H */
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index eb262e3324d2..c50061db6098 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -51,6 +51,20 @@
51 */ 51 */
52 52
53/* 53/*
54 * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
55 * offload off or on. If it is missing then the feature is assumed to be on.
56 * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
57 * offload on or off. If it is missing then the feature is assumed to be off.
58 */
59
60/*
61 * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
62 * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
63 * frontends nor backends are assumed to be capable unless the flags are
64 * present.
65 */
66
67/*
54 * This is the 'wire' format for packets: 68 * This is the 'wire' format for packets:
55 * Request 1: xen_netif_tx_request -- XEN_NETTXF_* (any flags) 69 * Request 1: xen_netif_tx_request -- XEN_NETTXF_* (any flags)
56 * [Request 2: xen_netif_extra_info] (only if request 1 has XEN_NETTXF_extra_info) 70 * [Request 2: xen_netif_extra_info] (only if request 1 has XEN_NETTXF_extra_info)
@@ -95,8 +109,10 @@ struct xen_netif_tx_request {
95#define _XEN_NETIF_EXTRA_FLAG_MORE (0) 109#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
96#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) 110#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
97 111
98/* GSO types - only TCPv4 currently supported. */ 112/* GSO types */
113#define XEN_NETIF_GSO_TYPE_NONE (0)
99#define XEN_NETIF_GSO_TYPE_TCPV4 (1) 114#define XEN_NETIF_GSO_TYPE_TCPV4 (1)
115#define XEN_NETIF_GSO_TYPE_TCPV6 (2)
100 116
101/* 117/*
102 * This structure needs to fit within both netif_tx_request and 118 * This structure needs to fit within both netif_tx_request and
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 8ddbfe66d637..4f4aabbd8eab 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -24,6 +24,7 @@ batman-adv-y += bitarray.o
24batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o 24batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
25batman-adv-y += debugfs.o 25batman-adv-y += debugfs.o
26batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o 26batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
27batman-adv-y += fragmentation.o
27batman-adv-y += gateway_client.o 28batman-adv-y += gateway_client.o
28batman-adv-y += gateway_common.o 29batman-adv-y += gateway_common.o
29batman-adv-y += hard-interface.o 30batman-adv-y += hard-interface.o
@@ -37,4 +38,3 @@ batman-adv-y += send.o
37batman-adv-y += soft-interface.o 38batman-adv-y += soft-interface.o
38batman-adv-y += sysfs.o 39batman-adv-y += sysfs.o
39batman-adv-y += translation-table.o 40batman-adv-y += translation-table.o
40batman-adv-y += unicast.o
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 264de88db320..5bb58d7bdd56 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -863,25 +863,25 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv,
863 struct arphdr *arphdr; 863 struct arphdr *arphdr;
864 uint8_t *hw_src, *hw_dst; 864 uint8_t *hw_src, *hw_dst;
865 struct batadv_bla_claim_dst *bla_dst; 865 struct batadv_bla_claim_dst *bla_dst;
866 uint16_t proto; 866 __be16 proto;
867 int headlen; 867 int headlen;
868 unsigned short vid = BATADV_NO_FLAGS; 868 unsigned short vid = BATADV_NO_FLAGS;
869 int ret; 869 int ret;
870 870
871 ethhdr = eth_hdr(skb); 871 ethhdr = eth_hdr(skb);
872 872
873 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { 873 if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
874 vhdr = (struct vlan_ethhdr *)ethhdr; 874 vhdr = (struct vlan_ethhdr *)ethhdr;
875 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; 875 vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK;
876 vid |= BATADV_VLAN_HAS_TAG; 876 vid |= BATADV_VLAN_HAS_TAG;
877 proto = ntohs(vhdr->h_vlan_encapsulated_proto); 877 proto = vhdr->h_vlan_encapsulated_proto;
878 headlen = sizeof(*vhdr); 878 headlen = sizeof(*vhdr);
879 } else { 879 } else {
880 proto = ntohs(ethhdr->h_proto); 880 proto = ethhdr->h_proto;
881 headlen = ETH_HLEN; 881 headlen = ETH_HLEN;
882 } 882 }
883 883
884 if (proto != ETH_P_ARP) 884 if (proto != htons(ETH_P_ARP))
885 return 0; /* not a claim frame */ 885 return 0; /* not a claim frame */
886 886
887 /* this must be a ARP frame. check if it is a claim. */ 887 /* this must be a ARP frame. check if it is a claim. */
@@ -1379,8 +1379,8 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb,
1379 1379
1380 ethhdr = (struct ethhdr *)(((uint8_t *)skb->data) + hdr_size); 1380 ethhdr = (struct ethhdr *)(((uint8_t *)skb->data) + hdr_size);
1381 1381
1382 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { 1382 if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
1383 if (!pskb_may_pull(skb, hdr_size + sizeof(struct vlan_ethhdr))) 1383 if (!pskb_may_pull(skb, hdr_size + VLAN_ETH_HLEN))
1384 return 0; 1384 return 0;
1385 1385
1386 vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); 1386 vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index f07ec320dc01..99da41290f82 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -29,7 +29,6 @@
29#include "send.h" 29#include "send.h"
30#include "types.h" 30#include "types.h"
31#include "translation-table.h" 31#include "translation-table.h"
32#include "unicast.h"
33 32
34static void batadv_dat_purge(struct work_struct *work); 33static void batadv_dat_purge(struct work_struct *work);
35 34
@@ -592,9 +591,9 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
592 goto free_orig; 591 goto free_orig;
593 592
594 tmp_skb = pskb_copy(skb, GFP_ATOMIC); 593 tmp_skb = pskb_copy(skb, GFP_ATOMIC);
595 if (!batadv_unicast_4addr_prepare_skb(bat_priv, tmp_skb, 594 if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
596 cand[i].orig_node, 595 cand[i].orig_node,
597 packet_subtype)) { 596 packet_subtype)) {
598 kfree_skb(tmp_skb); 597 kfree_skb(tmp_skb);
599 goto free_neigh; 598 goto free_neigh;
600 } 599 }
@@ -990,10 +989,10 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,
990 * that a node not using the 4addr packet format doesn't support it. 989 * that a node not using the 4addr packet format doesn't support it.
991 */ 990 */
992 if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) 991 if (hdr_size == sizeof(struct batadv_unicast_4addr_packet))
993 err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, 992 err = batadv_send_skb_unicast_4addr(bat_priv, skb_new,
994 BATADV_P_DAT_CACHE_REPLY); 993 BATADV_P_DAT_CACHE_REPLY);
995 else 994 else
996 err = batadv_unicast_send_skb(bat_priv, skb_new); 995 err = batadv_send_skb_unicast(bat_priv, skb_new);
997 996
998 if (!err) { 997 if (!err) {
999 batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX); 998 batadv_inc_counter(bat_priv, BATADV_CNT_DAT_CACHED_REPLY_TX);
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
new file mode 100644
index 000000000000..271d321b3a04
--- /dev/null
+++ b/net/batman-adv/fragmentation.c
@@ -0,0 +1,491 @@
1/* Copyright (C) 2013 B.A.T.M.A.N. contributors:
2 *
3 * Martin Hundebøll <martin@hundeboll.net>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#include "main.h"
21#include "fragmentation.h"
22#include "send.h"
23#include "originator.h"
24#include "routing.h"
25#include "hard-interface.h"
26#include "soft-interface.h"
27
28
29/**
30 * batadv_frag_clear_chain - delete entries in the fragment buffer chain
31 * @head: head of chain with entries.
32 *
33 * Free fragments in the passed hlist. Should be called with appropriate lock.
34 */
35static void batadv_frag_clear_chain(struct hlist_head *head)
36{
37 struct batadv_frag_list_entry *entry;
38 struct hlist_node *node;
39
40 hlist_for_each_entry_safe(entry, node, head, list) {
41 hlist_del(&entry->list);
42 kfree_skb(entry->skb);
43 kfree(entry);
44 }
45}
46
47/**
48 * batadv_frag_purge_orig - free fragments associated to an orig
49 * @orig_node: originator to free fragments from
50 * @check_cb: optional function to tell if an entry should be purged
51 */
52void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
53 bool (*check_cb)(struct batadv_frag_table_entry *))
54{
55 struct batadv_frag_table_entry *chain;
56 uint8_t i;
57
58 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
59 chain = &orig_node->fragments[i];
60 spin_lock_bh(&orig_node->fragments[i].lock);
61
62 if (!check_cb || check_cb(chain)) {
63 batadv_frag_clear_chain(&orig_node->fragments[i].head);
64 orig_node->fragments[i].size = 0;
65 }
66
67 spin_unlock_bh(&orig_node->fragments[i].lock);
68 }
69}
70
71/**
72 * batadv_frag_size_limit - maximum possible size of packet to be fragmented
73 *
74 * Returns the maximum size of payload that can be fragmented.
75 */
76static int batadv_frag_size_limit(void)
77{
78 int limit = BATADV_FRAG_MAX_FRAG_SIZE;
79
80 limit -= sizeof(struct batadv_frag_packet);
81 limit *= BATADV_FRAG_MAX_FRAGMENTS;
82
83 return limit;
84}
85
86/**
87 * batadv_frag_init_chain - check and prepare fragment chain for new fragment
88 * @chain: chain in fragments table to init
89 * @seqno: sequence number of the received fragment
90 *
91 * Make chain ready for a fragment with sequence number "seqno". Delete existing
92 * entries if they have an "old" sequence number.
93 *
94 * Caller must hold chain->lock.
95 *
96 * Returns true if chain is empty and caller can just insert the new fragment
97 * without searching for the right position.
98 */
99static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
100 uint16_t seqno)
101{
102 if (chain->seqno == seqno)
103 return false;
104
105 if (!hlist_empty(&chain->head))
106 batadv_frag_clear_chain(&chain->head);
107
108 chain->size = 0;
109 chain->seqno = seqno;
110
111 return true;
112}
113
114/**
115 * batadv_frag_insert_packet - insert a fragment into a fragment chain
116 * @orig_node: originator that the fragment was received from
117 * @skb: skb to insert
118 * @chain_out: list head to attach complete chains of fragments to
119 *
120 * Insert a new fragment into the reverse ordered chain in the right table
121 * entry. The hash table entry is cleared if "old" fragments exist in it.
122 *
123 * Returns true if skb is buffered, false on error. If the chain has all the
124 * fragments needed to merge the packet, the chain is moved to the passed head
125 * to avoid locking the chain in the table.
126 */
127static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
128 struct sk_buff *skb,
129 struct hlist_head *chain_out)
130{
131 struct batadv_frag_table_entry *chain;
132 struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr;
133 struct batadv_frag_packet *frag_packet;
134 uint8_t bucket;
135 uint16_t seqno, hdr_size = sizeof(struct batadv_frag_packet);
136 bool ret = false;
137
138 /* Linearize packet to avoid linearizing 16 packets in a row when doing
139 * the later merge. Non-linear merge should be added to remove this
140 * linearization.
141 */
142 if (skb_linearize(skb) < 0)
143 goto err;
144
145 frag_packet = (struct batadv_frag_packet *)skb->data;
146 seqno = ntohs(frag_packet->seqno);
147 bucket = seqno % BATADV_FRAG_BUFFER_COUNT;
148
149 frag_entry_new = kmalloc(sizeof(*frag_entry_new), GFP_ATOMIC);
150 if (!frag_entry_new)
151 goto err;
152
153 frag_entry_new->skb = skb;
154 frag_entry_new->no = frag_packet->no;
155
156 /* Select entry in the "chain table" and delete any prior fragments
157 * with another sequence number. batadv_frag_init_chain() returns true,
158 * if the list is empty at return.
159 */
160 chain = &orig_node->fragments[bucket];
161 spin_lock_bh(&chain->lock);
162 if (batadv_frag_init_chain(chain, seqno)) {
163 hlist_add_head(&frag_entry_new->list, &chain->head);
164 chain->size = skb->len - hdr_size;
165 chain->timestamp = jiffies;
166 ret = true;
167 goto out;
168 }
169
170 /* Find the position for the new fragment. */
171 hlist_for_each_entry(frag_entry_curr, &chain->head, list) {
172 /* Drop packet if fragment already exists. */
173 if (frag_entry_curr->no == frag_entry_new->no)
174 goto err_unlock;
175
176 /* Order fragments from highest to lowest. */
177 if (frag_entry_curr->no < frag_entry_new->no) {
178 hlist_add_before(&frag_entry_new->list,
179 &frag_entry_curr->list);
180 chain->size += skb->len - hdr_size;
181 chain->timestamp = jiffies;
182 ret = true;
183 goto out;
184 }
185 }
186
187 /* Reached the end of the list, so insert after 'frag_entry_curr'. */
188 if (likely(frag_entry_curr)) {
189 hlist_add_after(&frag_entry_curr->list, &frag_entry_new->list);
190 chain->size += skb->len - hdr_size;
191 chain->timestamp = jiffies;
192 ret = true;
193 }
194
195out:
196 if (chain->size > batadv_frag_size_limit() ||
197 ntohs(frag_packet->total_size) > batadv_frag_size_limit()) {
198 /* Clear chain if total size of either the list or the packet
199 * exceeds the maximum size of one merged packet.
200 */
201 batadv_frag_clear_chain(&chain->head);
202 chain->size = 0;
203 } else if (ntohs(frag_packet->total_size) == chain->size) {
204 /* All fragments received. Hand over chain to caller. */
205 hlist_move_list(&chain->head, chain_out);
206 chain->size = 0;
207 }
208
209err_unlock:
210 spin_unlock_bh(&chain->lock);
211
212err:
213 if (!ret)
214 kfree(frag_entry_new);
215
216 return ret;
217}
218
219/**
220 * batadv_frag_merge_packets - merge a chain of fragments
221 * @chain: head of chain with fragments
222 * @skb: packet with total size of skb after merging
223 *
224 * Expand the first skb in the chain and copy the content of the remaining
225 * skb's into the expanded one. After doing so, clear the chain.
226 *
227 * Returns the merged skb or NULL on error.
228 */
229static struct sk_buff *
230batadv_frag_merge_packets(struct hlist_head *chain, struct sk_buff *skb)
231{
232 struct batadv_frag_packet *packet;
233 struct batadv_frag_list_entry *entry;
234 struct sk_buff *skb_out = NULL;
235 int size, hdr_size = sizeof(struct batadv_frag_packet);
236
237 /* Make sure incoming skb has non-bogus data. */
238 packet = (struct batadv_frag_packet *)skb->data;
239 size = ntohs(packet->total_size);
240 if (size > batadv_frag_size_limit())
241 goto free;
242
243 /* Remove first entry, as this is the destination for the rest of the
244 * fragments.
245 */
246 entry = hlist_entry(chain->first, struct batadv_frag_list_entry, list);
247 hlist_del(&entry->list);
248 skb_out = entry->skb;
249 kfree(entry);
250
251 /* Make room for the rest of the fragments. */
252 if (pskb_expand_head(skb_out, 0, size - skb->len, GFP_ATOMIC) < 0) {
253 kfree_skb(skb_out);
254 skb_out = NULL;
255 goto free;
256 }
257
258 /* Move the existing MAC header to just before the payload. (Override
259 * the fragment header.)
260 */
261 skb_pull_rcsum(skb_out, hdr_size);
262 memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
263 skb_set_mac_header(skb_out, -ETH_HLEN);
264 skb_reset_network_header(skb_out);
265 skb_reset_transport_header(skb_out);
266
267 /* Copy the payload of the each fragment into the last skb */
268 hlist_for_each_entry(entry, chain, list) {
269 size = entry->skb->len - hdr_size;
270 memcpy(skb_put(skb_out, size), entry->skb->data + hdr_size,
271 size);
272 }
273
274free:
275 /* Locking is not needed, because 'chain' is not part of any orig. */
276 batadv_frag_clear_chain(chain);
277 return skb_out;
278}
279
280/**
281 * batadv_frag_skb_buffer - buffer fragment for later merge
282 * @skb: skb to buffer
283 * @orig_node_src: originator that the skb is received from
284 *
285 * Add fragment to buffer and merge fragments if possible.
286 *
287 * There are three possible outcomes: 1) Packet is merged: Return true and
288 * set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
289 * to NULL; 3) Error: Return false and leave skb as is.
290 */
291bool batadv_frag_skb_buffer(struct sk_buff **skb,
292 struct batadv_orig_node *orig_node_src)
293{
294 struct sk_buff *skb_out = NULL;
295 struct hlist_head head = HLIST_HEAD_INIT;
296 bool ret = false;
297
298 /* Add packet to buffer and table entry if merge is possible. */
299 if (!batadv_frag_insert_packet(orig_node_src, *skb, &head))
300 goto out_err;
301
302 /* Leave if more fragments are needed to merge. */
303 if (hlist_empty(&head))
304 goto out;
305
306 skb_out = batadv_frag_merge_packets(&head, *skb);
307 if (!skb_out)
308 goto out_err;
309
310out:
311 *skb = skb_out;
312 ret = true;
313out_err:
314 return ret;
315}
316
317/**
318 * batadv_frag_skb_fwd - forward fragments that would exceed MTU when merged
319 * @skb: skb to forward
320 * @recv_if: interface that the skb is received on
321 * @orig_node_src: originator that the skb is received from
322 *
323 * Look up the next-hop of the fragments payload and check if the merged packet
324 * will exceed the MTU towards the next-hop. If so, the fragment is forwarded
325 * without merging it.
326 *
327 * Returns true if the fragment is consumed/forwarded, false otherwise.
328 */
329bool batadv_frag_skb_fwd(struct sk_buff *skb,
330 struct batadv_hard_iface *recv_if,
331 struct batadv_orig_node *orig_node_src)
332{
333 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
334 struct batadv_orig_node *orig_node_dst = NULL;
335 struct batadv_neigh_node *neigh_node = NULL;
336 struct batadv_frag_packet *packet;
337 uint16_t total_size;
338 bool ret = false;
339
340 packet = (struct batadv_frag_packet *)skb->data;
341 orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
342 if (!orig_node_dst)
343 goto out;
344
345 neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
346 if (!neigh_node)
347 goto out;
348
349 /* Forward the fragment, if the merged packet would be too big to
350 * be assembled.
351 */
352 total_size = ntohs(packet->total_size);
353 if (total_size > neigh_node->if_incoming->net_dev->mtu) {
354 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_FWD);
355 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES,
356 skb->len + ETH_HLEN);
357
358 packet->header.ttl--;
359 batadv_send_skb_packet(skb, neigh_node->if_incoming,
360 neigh_node->addr);
361 ret = true;
362 }
363
364out:
365 if (orig_node_dst)
366 batadv_orig_node_free_ref(orig_node_dst);
367 if (neigh_node)
368 batadv_neigh_node_free_ref(neigh_node);
369 return ret;
370}
371
372/**
373 * batadv_frag_create - create a fragment from skb
374 * @skb: skb to create fragment from
375 * @frag_head: header to use in new fragment
376 * @mtu: size of new fragment
377 *
378 * Split the passed skb into two fragments: A new one with size matching the
379 * passed mtu and the old one with the rest. The new skb contains data from the
380 * tail of the old skb.
381 *
382 * Returns the new fragment, NULL on error.
383 */
384static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
385 struct batadv_frag_packet *frag_head,
386 unsigned int mtu)
387{
388 struct sk_buff *skb_fragment;
389 unsigned header_size = sizeof(*frag_head);
390 unsigned fragment_size = mtu - header_size;
391
392 skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
393 if (!skb_fragment)
394 goto err;
395
396 skb->priority = TC_PRIO_CONTROL;
397
398 /* Eat the last mtu-bytes of the skb */
399 skb_reserve(skb_fragment, header_size + ETH_HLEN);
400 skb_split(skb, skb_fragment, skb->len - fragment_size);
401
402 /* Add the header */
403 skb_push(skb_fragment, header_size);
404 memcpy(skb_fragment->data, frag_head, header_size);
405
406err:
407 return skb_fragment;
408}
409
410/**
411 * batadv_frag_send_packet - create up to 16 fragments from the passed skb
412 * @skb: skb to create fragments from
413 * @orig_node: final destination of the created fragments
414 * @neigh_node: next-hop of the created fragments
415 *
416 * Returns true on success, false otherwise.
417 */
418bool batadv_frag_send_packet(struct sk_buff *skb,
419 struct batadv_orig_node *orig_node,
420 struct batadv_neigh_node *neigh_node)
421{
422 struct batadv_priv *bat_priv;
423 struct batadv_hard_iface *primary_if;
424 struct batadv_frag_packet frag_header;
425 struct sk_buff *skb_fragment;
426 unsigned mtu = neigh_node->if_incoming->net_dev->mtu;
427 unsigned header_size = sizeof(frag_header);
428 unsigned max_fragment_size, max_packet_size;
429
430 /* To avoid merge and refragmentation at next-hops we never send
431 * fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
432 */
433 mtu = min_t(unsigned, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
434 max_fragment_size = (mtu - header_size - ETH_HLEN);
435 max_packet_size = max_fragment_size * BATADV_FRAG_MAX_FRAGMENTS;
436
437 /* Don't even try to fragment, if we need more than 16 fragments */
438 if (skb->len > max_packet_size)
439 goto out_err;
440
441 bat_priv = orig_node->bat_priv;
442 primary_if = batadv_primary_if_get_selected(bat_priv);
443 if (!primary_if)
444 goto out_err;
445
446 /* Create one header to be copied to all fragments */
447 frag_header.header.packet_type = BATADV_UNICAST_FRAG;
448 frag_header.header.version = BATADV_COMPAT_VERSION;
449 frag_header.header.ttl = BATADV_TTL;
450 frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno));
451 frag_header.reserved = 0;
452 frag_header.no = 0;
453 frag_header.total_size = htons(skb->len);
454 memcpy(frag_header.orig, primary_if->net_dev->dev_addr, ETH_ALEN);
455 memcpy(frag_header.dest, orig_node->orig, ETH_ALEN);
456
457 /* Eat and send fragments from the tail of skb */
458 while (skb->len > max_fragment_size) {
459 skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
460 if (!skb_fragment)
461 goto out_err;
462
463 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
464 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
465 skb_fragment->len + ETH_HLEN);
466 batadv_send_skb_packet(skb_fragment, neigh_node->if_incoming,
467 neigh_node->addr);
468 frag_header.no++;
469
470 /* The initial check in this function should cover this case */
471 if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)
472 goto out_err;
473 }
474
475 /* Make room for the fragment header. */
476 if (batadv_skb_head_push(skb, header_size) < 0 ||
477 pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0)
478 goto out_err;
479
480 memcpy(skb->data, &frag_header, header_size);
481
482 /* Send the last fragment */
483 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
484 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
485 skb->len + ETH_HLEN);
486 batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
487
488 return true;
489out_err:
490 return false;
491}
diff --git a/net/batman-adv/fragmentation.h b/net/batman-adv/fragmentation.h
new file mode 100644
index 000000000000..ca029e2676e7
--- /dev/null
+++ b/net/batman-adv/fragmentation.h
@@ -0,0 +1,50 @@
1/* Copyright (C) 2013 B.A.T.M.A.N. contributors:
2 *
3 * Martin Hundebøll <martin@hundeboll.net>
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _NET_BATMAN_ADV_FRAGMENTATION_H_
21#define _NET_BATMAN_ADV_FRAGMENTATION_H_
22
23void batadv_frag_purge_orig(struct batadv_orig_node *orig,
24 bool (*check_cb)(struct batadv_frag_table_entry *));
25bool batadv_frag_skb_fwd(struct sk_buff *skb,
26 struct batadv_hard_iface *recv_if,
27 struct batadv_orig_node *orig_node_src);
28bool batadv_frag_skb_buffer(struct sk_buff **skb,
29 struct batadv_orig_node *orig_node);
30bool batadv_frag_send_packet(struct sk_buff *skb,
31 struct batadv_orig_node *orig_node,
32 struct batadv_neigh_node *neigh_node);
33
34/**
35 * batadv_frag_check_entry - check if a list of fragments has timed out
36 * @frags_entry: table entry to check
37 *
38 * Returns true if the frags entry has timed out, false otherwise.
39 */
40static inline bool
41batadv_frag_check_entry(struct batadv_frag_table_entry *frags_entry)
42{
43 if (!hlist_empty(&frags_entry->head) &&
44 batadv_has_timed_out(frags_entry->timestamp, BATADV_FRAG_TIMEOUT))
45 return true;
46 else
47 return false;
48}
49
50#endif /* _NET_BATMAN_ADV_FRAGMENTATION_H_ */
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 1bce63aa5f5f..053bb318c7a7 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -655,24 +655,29 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
655 struct iphdr *iphdr; 655 struct iphdr *iphdr;
656 struct ipv6hdr *ipv6hdr; 656 struct ipv6hdr *ipv6hdr;
657 struct udphdr *udphdr; 657 struct udphdr *udphdr;
658 struct vlan_ethhdr *vhdr;
659 __be16 proto;
658 660
659 /* check for ethernet header */ 661 /* check for ethernet header */
660 if (!pskb_may_pull(skb, *header_len + ETH_HLEN)) 662 if (!pskb_may_pull(skb, *header_len + ETH_HLEN))
661 return false; 663 return false;
662 ethhdr = (struct ethhdr *)skb->data; 664 ethhdr = (struct ethhdr *)skb->data;
665 proto = ethhdr->h_proto;
663 *header_len += ETH_HLEN; 666 *header_len += ETH_HLEN;
664 667
665 /* check for initial vlan header */ 668 /* check for initial vlan header */
666 if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { 669 if (proto == htons(ETH_P_8021Q)) {
667 if (!pskb_may_pull(skb, *header_len + VLAN_HLEN)) 670 if (!pskb_may_pull(skb, *header_len + VLAN_HLEN))
668 return false; 671 return false;
669 ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN); 672
673 vhdr = (struct vlan_ethhdr *)skb->data;
674 proto = vhdr->h_vlan_encapsulated_proto;
670 *header_len += VLAN_HLEN; 675 *header_len += VLAN_HLEN;
671 } 676 }
672 677
673 /* check for ip header */ 678 /* check for ip header */
674 switch (ntohs(ethhdr->h_proto)) { 679 switch (proto) {
675 case ETH_P_IP: 680 case htons(ETH_P_IP):
676 if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr))) 681 if (!pskb_may_pull(skb, *header_len + sizeof(*iphdr)))
677 return false; 682 return false;
678 iphdr = (struct iphdr *)(skb->data + *header_len); 683 iphdr = (struct iphdr *)(skb->data + *header_len);
@@ -683,7 +688,7 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
683 return false; 688 return false;
684 689
685 break; 690 break;
686 case ETH_P_IPV6: 691 case htons(ETH_P_IPV6):
687 if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr))) 692 if (!pskb_may_pull(skb, *header_len + sizeof(*ipv6hdr)))
688 return false; 693 return false;
689 ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len); 694 ipv6hdr = (struct ipv6hdr *)(skb->data + *header_len);
@@ -710,12 +715,12 @@ bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
710 *header_len += sizeof(*udphdr); 715 *header_len += sizeof(*udphdr);
711 716
712 /* check for bootp port */ 717 /* check for bootp port */
713 if ((ntohs(ethhdr->h_proto) == ETH_P_IP) && 718 if ((proto == htons(ETH_P_IP)) &&
714 (ntohs(udphdr->dest) != 67)) 719 (udphdr->dest != htons(67)))
715 return false; 720 return false;
716 721
717 if ((ntohs(ethhdr->h_proto) == ETH_P_IPV6) && 722 if ((proto == htons(ETH_P_IPV6)) &&
718 (ntohs(udphdr->dest) != 547)) 723 (udphdr->dest != htons(547)))
719 return false; 724 return false;
720 725
721 return true; 726 return true;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index eeb667112d64..d564af295db4 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -269,9 +269,10 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
269 const struct batadv_priv *bat_priv = netdev_priv(soft_iface); 269 const struct batadv_priv *bat_priv = netdev_priv(soft_iface);
270 const struct batadv_hard_iface *hard_iface; 270 const struct batadv_hard_iface *hard_iface;
271 /* allow big frames if all devices are capable to do so 271 /* allow big frames if all devices are capable to do so
272 * (have MTU > 1500 + BAT_HEADER_LEN) 272 * (have MTU > 1500 + batadv_max_header_len())
273 */ 273 */
274 int min_mtu = ETH_DATA_LEN; 274 int min_mtu = ETH_DATA_LEN;
275 int max_header_len = batadv_max_header_len();
275 276
276 if (atomic_read(&bat_priv->fragmentation)) 277 if (atomic_read(&bat_priv->fragmentation))
277 goto out; 278 goto out;
@@ -285,8 +286,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
285 if (hard_iface->soft_iface != soft_iface) 286 if (hard_iface->soft_iface != soft_iface)
286 continue; 287 continue;
287 288
288 min_mtu = min_t(int, 289 min_mtu = min_t(int, hard_iface->net_dev->mtu - max_header_len,
289 hard_iface->net_dev->mtu - BATADV_HEADER_LEN,
290 min_mtu); 290 min_mtu);
291 } 291 }
292 rcu_read_unlock(); 292 rcu_read_unlock();
@@ -379,7 +379,8 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
379{ 379{
380 struct batadv_priv *bat_priv; 380 struct batadv_priv *bat_priv;
381 struct net_device *soft_iface, *master; 381 struct net_device *soft_iface, *master;
382 __be16 ethertype = __constant_htons(ETH_P_BATMAN); 382 __be16 ethertype = htons(ETH_P_BATMAN);
383 int max_header_len = batadv_max_header_len();
383 int ret; 384 int ret;
384 385
385 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) 386 if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
@@ -444,23 +445,22 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
444 hard_iface->batman_adv_ptype.dev = hard_iface->net_dev; 445 hard_iface->batman_adv_ptype.dev = hard_iface->net_dev;
445 dev_add_pack(&hard_iface->batman_adv_ptype); 446 dev_add_pack(&hard_iface->batman_adv_ptype);
446 447
447 atomic_set(&hard_iface->frag_seqno, 1);
448 batadv_info(hard_iface->soft_iface, "Adding interface: %s\n", 448 batadv_info(hard_iface->soft_iface, "Adding interface: %s\n",
449 hard_iface->net_dev->name); 449 hard_iface->net_dev->name);
450 450
451 if (atomic_read(&bat_priv->fragmentation) && 451 if (atomic_read(&bat_priv->fragmentation) &&
452 hard_iface->net_dev->mtu < ETH_DATA_LEN + BATADV_HEADER_LEN) 452 hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
453 batadv_info(hard_iface->soft_iface, 453 batadv_info(hard_iface->soft_iface,
454 "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %zi would solve the problem.\n", 454 "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. Packets going over this interface will be fragmented on layer2 which could impact the performance. Setting the MTU to %i would solve the problem.\n",
455 hard_iface->net_dev->name, hard_iface->net_dev->mtu, 455 hard_iface->net_dev->name, hard_iface->net_dev->mtu,
456 ETH_DATA_LEN + BATADV_HEADER_LEN); 456 ETH_DATA_LEN + max_header_len);
457 457
458 if (!atomic_read(&bat_priv->fragmentation) && 458 if (!atomic_read(&bat_priv->fragmentation) &&
459 hard_iface->net_dev->mtu < ETH_DATA_LEN + BATADV_HEADER_LEN) 459 hard_iface->net_dev->mtu < ETH_DATA_LEN + max_header_len)
460 batadv_info(hard_iface->soft_iface, 460 batadv_info(hard_iface->soft_iface,
461 "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %zi.\n", 461 "The MTU of interface %s is too small (%i) to handle the transport of batman-adv packets. If you experience problems getting traffic through try increasing the MTU to %i.\n",
462 hard_iface->net_dev->name, hard_iface->net_dev->mtu, 462 hard_iface->net_dev->name, hard_iface->net_dev->mtu,
463 ETH_DATA_LEN + BATADV_HEADER_LEN); 463 ETH_DATA_LEN + max_header_len);
464 464
465 if (batadv_hardif_is_iface_up(hard_iface)) 465 if (batadv_hardif_is_iface_up(hard_iface))
466 batadv_hardif_activate_interface(hard_iface); 466 batadv_hardif_activate_interface(hard_iface);
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 5a99bb4b6b82..82ac6472fa6f 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -192,25 +192,25 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
192 goto free_skb; 192 goto free_skb;
193 } 193 }
194 194
195 if (icmp_packet->header.packet_type != BATADV_ICMP) { 195 if (icmp_packet->icmph.header.packet_type != BATADV_ICMP) {
196 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 196 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
197 "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n"); 197 "Error - can't send packet from char device: got bogus packet type (expected: BAT_ICMP)\n");
198 len = -EINVAL; 198 len = -EINVAL;
199 goto free_skb; 199 goto free_skb;
200 } 200 }
201 201
202 if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { 202 if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
203 batadv_dbg(BATADV_DBG_BATMAN, bat_priv, 203 batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
204 "Error - can't send packet from char device: got bogus message type (expected: ECHO_REQUEST)\n"); 204 "Error - can't send packet from char device: got bogus message type (expected: ECHO_REQUEST)\n");
205 len = -EINVAL; 205 len = -EINVAL;
206 goto free_skb; 206 goto free_skb;
207 } 207 }
208 208
209 icmp_packet->uid = socket_client->index; 209 icmp_packet->icmph.uid = socket_client->index;
210 210
211 if (icmp_packet->header.version != BATADV_COMPAT_VERSION) { 211 if (icmp_packet->icmph.header.version != BATADV_COMPAT_VERSION) {
212 icmp_packet->msg_type = BATADV_PARAMETER_PROBLEM; 212 icmp_packet->icmph.msg_type = BATADV_PARAMETER_PROBLEM;
213 icmp_packet->header.version = BATADV_COMPAT_VERSION; 213 icmp_packet->icmph.header.version = BATADV_COMPAT_VERSION;
214 batadv_socket_add_packet(socket_client, icmp_packet, 214 batadv_socket_add_packet(socket_client, icmp_packet,
215 packet_len); 215 packet_len);
216 goto free_skb; 216 goto free_skb;
@@ -219,7 +219,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
219 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) 219 if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
220 goto dst_unreach; 220 goto dst_unreach;
221 221
222 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->dst); 222 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
223 if (!orig_node) 223 if (!orig_node)
224 goto dst_unreach; 224 goto dst_unreach;
225 225
@@ -233,7 +233,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
233 if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE) 233 if (neigh_node->if_incoming->if_status != BATADV_IF_ACTIVE)
234 goto dst_unreach; 234 goto dst_unreach;
235 235
236 memcpy(icmp_packet->orig, 236 memcpy(icmp_packet->icmph.orig,
237 primary_if->net_dev->dev_addr, ETH_ALEN); 237 primary_if->net_dev->dev_addr, ETH_ALEN);
238 238
239 if (packet_len == sizeof(struct batadv_icmp_packet_rr)) 239 if (packet_len == sizeof(struct batadv_icmp_packet_rr))
@@ -244,7 +244,7 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff,
244 goto out; 244 goto out;
245 245
246dst_unreach: 246dst_unreach:
247 icmp_packet->msg_type = BATADV_DESTINATION_UNREACHABLE; 247 icmp_packet->icmph.msg_type = BATADV_DESTINATION_UNREACHABLE;
248 batadv_socket_add_packet(socket_client, icmp_packet, packet_len); 248 batadv_socket_add_packet(socket_client, icmp_packet, packet_len);
249free_skb: 249free_skb:
250 kfree_skb(skb); 250 kfree_skb(skb);
@@ -318,7 +318,7 @@ static void batadv_socket_add_packet(struct batadv_socket_client *socket_client,
318 /* while waiting for the lock the socket_client could have been 318 /* while waiting for the lock the socket_client could have been
319 * deleted 319 * deleted
320 */ 320 */
321 if (!batadv_socket_client_hash[icmp_packet->uid]) { 321 if (!batadv_socket_client_hash[icmp_packet->icmph.uid]) {
322 spin_unlock_bh(&socket_client->lock); 322 spin_unlock_bh(&socket_client->lock);
323 kfree(socket_packet); 323 kfree(socket_packet);
324 return; 324 return;
@@ -347,7 +347,7 @@ void batadv_socket_receive_packet(struct batadv_icmp_packet_rr *icmp_packet,
347{ 347{
348 struct batadv_socket_client *hash; 348 struct batadv_socket_client *hash;
349 349
350 hash = batadv_socket_client_hash[icmp_packet->uid]; 350 hash = batadv_socket_client_hash[icmp_packet->icmph.uid];
351 if (hash) 351 if (hash)
352 batadv_socket_add_packet(hash, icmp_packet, icmp_len); 352 batadv_socket_add_packet(hash, icmp_packet, icmp_len);
353} 353}
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 8b195e63e70e..7f3a5c426615 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -36,11 +36,11 @@
36#include "gateway_client.h" 36#include "gateway_client.h"
37#include "bridge_loop_avoidance.h" 37#include "bridge_loop_avoidance.h"
38#include "distributed-arp-table.h" 38#include "distributed-arp-table.h"
39#include "unicast.h"
40#include "gateway_common.h" 39#include "gateway_common.h"
41#include "hash.h" 40#include "hash.h"
42#include "bat_algo.h" 41#include "bat_algo.h"
43#include "network-coding.h" 42#include "network-coding.h"
43#include "fragmentation.h"
44 44
45 45
46/* List manipulations on hardif_list have to be rtnl_lock()'ed, 46/* List manipulations on hardif_list have to be rtnl_lock()'ed,
@@ -256,6 +256,31 @@ out:
256} 256}
257 257
258/** 258/**
259 * batadv_max_header_len - calculate maximum encapsulation overhead for a
260 * payload packet
261 *
262 * Return the maximum encapsulation overhead in bytes.
263 */
264int batadv_max_header_len(void)
265{
266 int header_len = 0;
267
268 header_len = max_t(int, header_len,
269 sizeof(struct batadv_unicast_packet));
270 header_len = max_t(int, header_len,
271 sizeof(struct batadv_unicast_4addr_packet));
272 header_len = max_t(int, header_len,
273 sizeof(struct batadv_bcast_packet));
274
275#ifdef CONFIG_BATMAN_ADV_NC
276 header_len = max_t(int, header_len,
277 sizeof(struct batadv_coded_packet));
278#endif
279
280 return header_len;
281}
282
283/**
259 * batadv_skb_set_priority - sets skb priority according to packet content 284 * batadv_skb_set_priority - sets skb priority according to packet content
260 * @skb: the packet to be sent 285 * @skb: the packet to be sent
261 * @offset: offset to the packet content 286 * @offset: offset to the packet content
@@ -399,10 +424,10 @@ static void batadv_recv_handler_init(void)
399 /* compile time checks for struct member offsets */ 424 /* compile time checks for struct member offsets */
400 BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10); 425 BUILD_BUG_ON(offsetof(struct batadv_unicast_4addr_packet, src) != 10);
401 BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4); 426 BUILD_BUG_ON(offsetof(struct batadv_unicast_packet, dest) != 4);
402 BUILD_BUG_ON(offsetof(struct batadv_unicast_frag_packet, dest) != 4);
403 BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4); 427 BUILD_BUG_ON(offsetof(struct batadv_unicast_tvlv_packet, dst) != 4);
404 BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, dst) != 4); 428 BUILD_BUG_ON(offsetof(struct batadv_frag_packet, dest) != 4);
405 BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, dst) != 4); 429 BUILD_BUG_ON(offsetof(struct batadv_icmp_packet, icmph.dst) != 4);
430 BUILD_BUG_ON(offsetof(struct batadv_icmp_packet_rr, icmph.dst) != 4);
406 431
407 /* broadcast packet */ 432 /* broadcast packet */
408 batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet; 433 batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
@@ -412,12 +437,12 @@ static void batadv_recv_handler_init(void)
412 batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet; 437 batadv_rx_handler[BATADV_UNICAST_4ADDR] = batadv_recv_unicast_packet;
413 /* unicast packet */ 438 /* unicast packet */
414 batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet; 439 batadv_rx_handler[BATADV_UNICAST] = batadv_recv_unicast_packet;
415 /* fragmented unicast packet */
416 batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_ucast_frag_packet;
417 /* unicast tvlv packet */ 440 /* unicast tvlv packet */
418 batadv_rx_handler[BATADV_UNICAST_TVLV] = batadv_recv_unicast_tvlv; 441 batadv_rx_handler[BATADV_UNICAST_TVLV] = batadv_recv_unicast_tvlv;
419 /* batman icmp packet */ 442 /* batman icmp packet */
420 batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet; 443 batadv_rx_handler[BATADV_ICMP] = batadv_recv_icmp_packet;
444 /* Fragmented packets */
445 batadv_rx_handler[BATADV_UNICAST_FRAG] = batadv_recv_frag_packet;
421} 446}
422 447
423int 448int
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index e11c2ec7a739..54c13d51edbe 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -131,6 +131,15 @@ enum batadv_uev_type {
131 131
132#define BATADV_GW_THRESHOLD 50 132#define BATADV_GW_THRESHOLD 50
133 133
134/* Number of fragment chains for each orig_node */
135#define BATADV_FRAG_BUFFER_COUNT 8
136/* Maximum number of fragments for one packet */
137#define BATADV_FRAG_MAX_FRAGMENTS 16
138/* Maxumim size of each fragment */
139#define BATADV_FRAG_MAX_FRAG_SIZE 1400
140/* Time to keep fragments while waiting for rest of the fragments */
141#define BATADV_FRAG_TIMEOUT 10000
142
134#define BATADV_DAT_CANDIDATE_NOT_FOUND 0 143#define BATADV_DAT_CANDIDATE_NOT_FOUND 0
135#define BATADV_DAT_CANDIDATE_ORIG 1 144#define BATADV_DAT_CANDIDATE_ORIG 1
136 145
@@ -182,6 +191,7 @@ void batadv_mesh_free(struct net_device *soft_iface);
182int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr); 191int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr);
183struct batadv_hard_iface * 192struct batadv_hard_iface *
184batadv_seq_print_text_primary_if_get(struct seq_file *seq); 193batadv_seq_print_text_primary_if_get(struct seq_file *seq);
194int batadv_max_header_len(void);
185void batadv_skb_set_priority(struct sk_buff *skb, int offset); 195void batadv_skb_set_priority(struct sk_buff *skb, int offset);
186int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev, 196int batadv_batman_skb_recv(struct sk_buff *skb, struct net_device *dev,
187 struct packet_type *ptype, 197 struct packet_type *ptype,
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 5d53d2f38377..a591dc5c321e 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -25,10 +25,10 @@
25#include "routing.h" 25#include "routing.h"
26#include "gateway_client.h" 26#include "gateway_client.h"
27#include "hard-interface.h" 27#include "hard-interface.h"
28#include "unicast.h"
29#include "soft-interface.h" 28#include "soft-interface.h"
30#include "bridge_loop_avoidance.h" 29#include "bridge_loop_avoidance.h"
31#include "network-coding.h" 30#include "network-coding.h"
31#include "fragmentation.h"
32 32
33/* hash class keys */ 33/* hash class keys */
34static struct lock_class_key batadv_orig_hash_lock_class_key; 34static struct lock_class_key batadv_orig_hash_lock_class_key;
@@ -146,7 +146,8 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)
146 /* Free nc_nodes */ 146 /* Free nc_nodes */
147 batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); 147 batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL);
148 148
149 batadv_frag_list_free(&orig_node->frag_list); 149 batadv_frag_purge_orig(orig_node, NULL);
150
150 batadv_tt_global_del_orig(orig_node->bat_priv, orig_node, 151 batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,
151 "originator timed out"); 152 "originator timed out");
152 153
@@ -217,7 +218,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
217 const uint8_t *addr) 218 const uint8_t *addr)
218{ 219{
219 struct batadv_orig_node *orig_node; 220 struct batadv_orig_node *orig_node;
220 int size; 221 int size, i;
221 int hash_added; 222 int hash_added;
222 unsigned long reset_time; 223 unsigned long reset_time;
223 224
@@ -269,8 +270,11 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,
269 size = bat_priv->num_ifaces * sizeof(uint8_t); 270 size = bat_priv->num_ifaces * sizeof(uint8_t);
270 orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC); 271 orig_node->bcast_own_sum = kzalloc(size, GFP_ATOMIC);
271 272
272 INIT_LIST_HEAD(&orig_node->frag_list); 273 for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
273 orig_node->last_frag_packet = 0; 274 INIT_HLIST_HEAD(&orig_node->fragments[i].head);
275 spin_lock_init(&orig_node->fragments[i].lock);
276 orig_node->fragments[i].size = 0;
277 }
274 278
275 if (!orig_node->bcast_own_sum) 279 if (!orig_node->bcast_own_sum)
276 goto free_bcast_own; 280 goto free_bcast_own;
@@ -394,9 +398,8 @@ static void _batadv_purge_orig(struct batadv_priv *bat_priv)
394 continue; 398 continue;
395 } 399 }
396 400
397 if (batadv_has_timed_out(orig_node->last_frag_packet, 401 batadv_frag_purge_orig(orig_node,
398 BATADV_FRAG_TIMEOUT)) 402 batadv_frag_check_entry);
399 batadv_frag_list_free(&orig_node->frag_list);
400 } 403 }
401 spin_unlock_bh(list_lock); 404 spin_unlock_bh(list_lock);
402 } 405 }
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 4361bae6186a..65e723ed030b 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -91,12 +91,6 @@ enum batadv_icmp_packettype {
91 BATADV_PARAMETER_PROBLEM = 12, 91 BATADV_PARAMETER_PROBLEM = 12,
92}; 92};
93 93
94/* fragmentation defines */
95enum batadv_unicast_frag_flags {
96 BATADV_UNI_FRAG_HEAD = BIT(0),
97 BATADV_UNI_FRAG_LARGETAIL = BIT(1),
98};
99
100/* tt data subtypes */ 94/* tt data subtypes */
101#define BATADV_TT_DATA_TYPE_MASK 0x0F 95#define BATADV_TT_DATA_TYPE_MASK 0x0F
102 96
@@ -192,29 +186,47 @@ struct batadv_ogm_packet {
192 186
193#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet) 187#define BATADV_OGM_HLEN sizeof(struct batadv_ogm_packet)
194 188
195struct batadv_icmp_packet { 189/**
190 * batadv_icmp_header - common ICMP header
191 * @header: common batman header
192 * @msg_type: ICMP packet type
193 * @dst: address of the destination node
194 * @orig: address of the source node
195 * @uid: local ICMP socket identifier
196 */
197struct batadv_icmp_header {
196 struct batadv_header header; 198 struct batadv_header header;
197 uint8_t msg_type; /* see ICMP message types above */ 199 uint8_t msg_type; /* see ICMP message types above */
198 uint8_t dst[ETH_ALEN]; 200 uint8_t dst[ETH_ALEN];
199 uint8_t orig[ETH_ALEN]; 201 uint8_t orig[ETH_ALEN];
200 __be16 seqno;
201 uint8_t uid; 202 uint8_t uid;
203};
204
205/**
206 * batadv_icmp_packet - ICMP packet
207 * @icmph: common ICMP header
208 * @reserved: not used - useful for alignment
209 * @seqno: ICMP sequence number
210 */
211struct batadv_icmp_packet {
212 struct batadv_icmp_header icmph;
202 uint8_t reserved; 213 uint8_t reserved;
214 __be16 seqno;
203}; 215};
204 216
205#define BATADV_RR_LEN 16 217#define BATADV_RR_LEN 16
206 218
207/* icmp_packet_rr must start with all fields from imcp_packet 219/**
208 * as this is assumed by code that handles ICMP packets 220 * batadv_icmp_packet_rr - ICMP RouteRecord packet
221 * @icmph: common ICMP header
222 * @rr_cur: number of entries the rr array
223 * @seqno: ICMP sequence number
224 * @rr: route record array
209 */ 225 */
210struct batadv_icmp_packet_rr { 226struct batadv_icmp_packet_rr {
211 struct batadv_header header; 227 struct batadv_icmp_header icmph;
212 uint8_t msg_type; /* see ICMP message types above */
213 uint8_t dst[ETH_ALEN];
214 uint8_t orig[ETH_ALEN];
215 __be16 seqno;
216 uint8_t uid;
217 uint8_t rr_cur; 228 uint8_t rr_cur;
229 __be16 seqno;
218 uint8_t rr[BATADV_RR_LEN][ETH_ALEN]; 230 uint8_t rr[BATADV_RR_LEN][ETH_ALEN];
219}; 231};
220 232
@@ -255,15 +267,32 @@ struct batadv_unicast_4addr_packet {
255 */ 267 */
256}; 268};
257 269
258struct batadv_unicast_frag_packet { 270/**
259 struct batadv_header header; 271 * struct batadv_frag_packet - fragmented packet
260 uint8_t ttvn; /* destination translation table version number */ 272 * @header: common batman packet header with type, compatversion, and ttl
261 uint8_t dest[ETH_ALEN]; 273 * @dest: final destination used when routing fragments
262 uint8_t flags; 274 * @orig: originator of the fragment used when merging the packet
263 uint8_t align; 275 * @no: fragment number within this sequence
264 uint8_t orig[ETH_ALEN]; 276 * @reserved: reserved byte for alignment
265 __be16 seqno; 277 * @seqno: sequence identification
266} __packed; 278 * @total_size: size of the merged packet
279 */
280struct batadv_frag_packet {
281 struct batadv_header header;
282#if defined(__BIG_ENDIAN_BITFIELD)
283 uint8_t no:4;
284 uint8_t reserved:4;
285#elif defined(__LITTLE_ENDIAN_BITFIELD)
286 uint8_t reserved:4;
287 uint8_t no:4;
288#else
289#error "unknown bitfield endianess"
290#endif
291 uint8_t dest[ETH_ALEN];
292 uint8_t orig[ETH_ALEN];
293 __be16 seqno;
294 __be16 total_size;
295};
267 296
268struct batadv_bcast_packet { 297struct batadv_bcast_packet {
269 struct batadv_header header; 298 struct batadv_header header;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 457dfef9c5fc..3281a504c20a 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -25,10 +25,10 @@
25#include "icmp_socket.h" 25#include "icmp_socket.h"
26#include "translation-table.h" 26#include "translation-table.h"
27#include "originator.h" 27#include "originator.h"
28#include "unicast.h"
29#include "bridge_loop_avoidance.h" 28#include "bridge_loop_avoidance.h"
30#include "distributed-arp-table.h" 29#include "distributed-arp-table.h"
31#include "network-coding.h" 30#include "network-coding.h"
31#include "fragmentation.h"
32 32
33static int batadv_route_unicast_packet(struct sk_buff *skb, 33static int batadv_route_unicast_packet(struct sk_buff *skb,
34 struct batadv_hard_iface *recv_if); 34 struct batadv_hard_iface *recv_if);
@@ -258,7 +258,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
258 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; 258 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
259 259
260 /* add data to device queue */ 260 /* add data to device queue */
261 if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { 261 if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
262 batadv_socket_receive_packet(icmp_packet, icmp_len); 262 batadv_socket_receive_packet(icmp_packet, icmp_len);
263 goto out; 263 goto out;
264 } 264 }
@@ -269,7 +269,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
269 269
270 /* answer echo request (ping) */ 270 /* answer echo request (ping) */
271 /* get routing information */ 271 /* get routing information */
272 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); 272 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig);
273 if (!orig_node) 273 if (!orig_node)
274 goto out; 274 goto out;
275 275
@@ -279,10 +279,11 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv,
279 279
280 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; 280 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
281 281
282 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); 282 memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN);
283 memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); 283 memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr,
284 icmp_packet->msg_type = BATADV_ECHO_REPLY; 284 ETH_ALEN);
285 icmp_packet->header.ttl = BATADV_TTL; 285 icmp_packet->icmph.msg_type = BATADV_ECHO_REPLY;
286 icmp_packet->icmph.header.ttl = BATADV_TTL;
286 287
287 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) 288 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
288 ret = NET_RX_SUCCESS; 289 ret = NET_RX_SUCCESS;
@@ -306,9 +307,9 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
306 icmp_packet = (struct batadv_icmp_packet *)skb->data; 307 icmp_packet = (struct batadv_icmp_packet *)skb->data;
307 308
308 /* send TTL exceeded if packet is an echo request (traceroute) */ 309 /* send TTL exceeded if packet is an echo request (traceroute) */
309 if (icmp_packet->msg_type != BATADV_ECHO_REQUEST) { 310 if (icmp_packet->icmph.msg_type != BATADV_ECHO_REQUEST) {
310 pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n", 311 pr_debug("Warning - can't forward icmp packet from %pM to %pM: ttl exceeded\n",
311 icmp_packet->orig, icmp_packet->dst); 312 icmp_packet->icmph.orig, icmp_packet->icmph.dst);
312 goto out; 313 goto out;
313 } 314 }
314 315
@@ -317,7 +318,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
317 goto out; 318 goto out;
318 319
319 /* get routing information */ 320 /* get routing information */
320 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->orig); 321 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.orig);
321 if (!orig_node) 322 if (!orig_node)
322 goto out; 323 goto out;
323 324
@@ -327,10 +328,11 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv,
327 328
328 icmp_packet = (struct batadv_icmp_packet *)skb->data; 329 icmp_packet = (struct batadv_icmp_packet *)skb->data;
329 330
330 memcpy(icmp_packet->dst, icmp_packet->orig, ETH_ALEN); 331 memcpy(icmp_packet->icmph.dst, icmp_packet->icmph.orig, ETH_ALEN);
331 memcpy(icmp_packet->orig, primary_if->net_dev->dev_addr, ETH_ALEN); 332 memcpy(icmp_packet->icmph.orig, primary_if->net_dev->dev_addr,
332 icmp_packet->msg_type = BATADV_TTL_EXCEEDED; 333 ETH_ALEN);
333 icmp_packet->header.ttl = BATADV_TTL; 334 icmp_packet->icmph.msg_type = BATADV_TTL_EXCEEDED;
335 icmp_packet->icmph.header.ttl = BATADV_TTL;
334 336
335 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) 337 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
336 ret = NET_RX_SUCCESS; 338 ret = NET_RX_SUCCESS;
@@ -379,7 +381,9 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
379 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; 381 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
380 382
381 /* add record route information if not full */ 383 /* add record route information if not full */
382 if ((hdr_size == sizeof(struct batadv_icmp_packet_rr)) && 384 if ((icmp_packet->icmph.msg_type == BATADV_ECHO_REPLY ||
385 icmp_packet->icmph.msg_type == BATADV_ECHO_REQUEST) &&
386 (hdr_size == sizeof(struct batadv_icmp_packet_rr)) &&
383 (icmp_packet->rr_cur < BATADV_RR_LEN)) { 387 (icmp_packet->rr_cur < BATADV_RR_LEN)) {
384 memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]), 388 memcpy(&(icmp_packet->rr[icmp_packet->rr_cur]),
385 ethhdr->h_dest, ETH_ALEN); 389 ethhdr->h_dest, ETH_ALEN);
@@ -387,15 +391,15 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
387 } 391 }
388 392
389 /* packet for me */ 393 /* packet for me */
390 if (batadv_is_my_mac(bat_priv, icmp_packet->dst)) 394 if (batadv_is_my_mac(bat_priv, icmp_packet->icmph.dst))
391 return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size); 395 return batadv_recv_my_icmp_packet(bat_priv, skb, hdr_size);
392 396
393 /* TTL exceeded */ 397 /* TTL exceeded */
394 if (icmp_packet->header.ttl < 2) 398 if (icmp_packet->icmph.header.ttl < 2)
395 return batadv_recv_icmp_ttl_exceeded(bat_priv, skb); 399 return batadv_recv_icmp_ttl_exceeded(bat_priv, skb);
396 400
397 /* get routing information */ 401 /* get routing information */
398 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->dst); 402 orig_node = batadv_orig_hash_find(bat_priv, icmp_packet->icmph.dst);
399 if (!orig_node) 403 if (!orig_node)
400 goto out; 404 goto out;
401 405
@@ -406,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
406 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data; 410 icmp_packet = (struct batadv_icmp_packet_rr *)skb->data;
407 411
408 /* decrement ttl */ 412 /* decrement ttl */
409 icmp_packet->header.ttl--; 413 icmp_packet->icmph.header.ttl--;
410 414
411 /* route it */ 415 /* route it */
412 if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) 416 if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP)
@@ -651,11 +655,9 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
651{ 655{
652 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); 656 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
653 struct batadv_orig_node *orig_node = NULL; 657 struct batadv_orig_node *orig_node = NULL;
654 struct batadv_neigh_node *neigh_node = NULL;
655 struct batadv_unicast_packet *unicast_packet; 658 struct batadv_unicast_packet *unicast_packet;
656 struct ethhdr *ethhdr = eth_hdr(skb); 659 struct ethhdr *ethhdr = eth_hdr(skb);
657 int res, hdr_len, ret = NET_RX_DROP; 660 int res, hdr_len, ret = NET_RX_DROP;
658 struct sk_buff *new_skb;
659 661
660 unicast_packet = (struct batadv_unicast_packet *)skb->data; 662 unicast_packet = (struct batadv_unicast_packet *)skb->data;
661 663
@@ -672,46 +674,12 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
672 if (!orig_node) 674 if (!orig_node)
673 goto out; 675 goto out;
674 676
675 /* find_router() increases neigh_nodes refcount if found. */
676 neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
677
678 if (!neigh_node)
679 goto out;
680
681 /* create a copy of the skb, if needed, to modify it. */ 677 /* create a copy of the skb, if needed, to modify it. */
682 if (skb_cow(skb, ETH_HLEN) < 0) 678 if (skb_cow(skb, ETH_HLEN) < 0)
683 goto out; 679 goto out;
684 680
685 unicast_packet = (struct batadv_unicast_packet *)skb->data;
686
687 if (unicast_packet->header.packet_type == BATADV_UNICAST &&
688 atomic_read(&bat_priv->fragmentation) &&
689 skb->len > neigh_node->if_incoming->net_dev->mtu) {
690 ret = batadv_frag_send_skb(skb, bat_priv,
691 neigh_node->if_incoming,
692 neigh_node->addr);
693 goto out;
694 }
695
696 if (unicast_packet->header.packet_type == BATADV_UNICAST_FRAG &&
697 batadv_frag_can_reassemble(skb,
698 neigh_node->if_incoming->net_dev->mtu)) {
699 ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
700
701 if (ret == NET_RX_DROP)
702 goto out;
703
704 /* packet was buffered for late merge */
705 if (!new_skb) {
706 ret = NET_RX_SUCCESS;
707 goto out;
708 }
709
710 skb = new_skb;
711 unicast_packet = (struct batadv_unicast_packet *)skb->data;
712 }
713
714 /* decrement ttl */ 681 /* decrement ttl */
682 unicast_packet = (struct batadv_unicast_packet *)skb->data;
715 unicast_packet->header.ttl--; 683 unicast_packet->header.ttl--;
716 684
717 switch (unicast_packet->header.packet_type) { 685 switch (unicast_packet->header.packet_type) {
@@ -746,8 +714,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,
746 } 714 }
747 715
748out: 716out:
749 if (neigh_node)
750 batadv_neigh_node_free_ref(neigh_node);
751 if (orig_node) 717 if (orig_node)
752 batadv_orig_node_free_ref(orig_node); 718 batadv_orig_node_free_ref(orig_node);
753 return ret; 719 return ret;
@@ -1001,51 +967,6 @@ rx_success:
1001 return batadv_route_unicast_packet(skb, recv_if); 967 return batadv_route_unicast_packet(skb, recv_if);
1002} 968}
1003 969
1004int batadv_recv_ucast_frag_packet(struct sk_buff *skb,
1005 struct batadv_hard_iface *recv_if)
1006{
1007 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1008 struct batadv_unicast_frag_packet *unicast_packet;
1009 int hdr_size = sizeof(*unicast_packet);
1010 struct sk_buff *new_skb = NULL;
1011 int ret;
1012
1013 if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
1014 return NET_RX_DROP;
1015
1016 if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))
1017 return NET_RX_DROP;
1018
1019 unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
1020
1021 /* packet for me */
1022 if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) {
1023 ret = batadv_frag_reassemble_skb(skb, bat_priv, &new_skb);
1024
1025 if (ret == NET_RX_DROP)
1026 return NET_RX_DROP;
1027
1028 /* packet was buffered for late merge */
1029 if (!new_skb)
1030 return NET_RX_SUCCESS;
1031
1032 if (batadv_dat_snoop_incoming_arp_request(bat_priv, new_skb,
1033 hdr_size))
1034 goto rx_success;
1035 if (batadv_dat_snoop_incoming_arp_reply(bat_priv, new_skb,
1036 hdr_size))
1037 goto rx_success;
1038
1039 batadv_interface_rx(recv_if->soft_iface, new_skb, recv_if,
1040 sizeof(struct batadv_unicast_packet), NULL);
1041
1042rx_success:
1043 return NET_RX_SUCCESS;
1044 }
1045
1046 return batadv_route_unicast_packet(skb, recv_if);
1047}
1048
1049/** 970/**
1050 * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets 971 * batadv_recv_unicast_tvlv - receive and process unicast tvlv packets
1051 * @skb: unicast tvlv packet to process 972 * @skb: unicast tvlv packet to process
@@ -1095,6 +1016,64 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
1095 return ret; 1016 return ret;
1096} 1017}
1097 1018
1019/**
1020 * batadv_recv_frag_packet - process received fragment
1021 * @skb: the received fragment
1022 * @recv_if: interface that the skb is received on
1023 *
1024 * This function does one of the three following things: 1) Forward fragment, if
1025 * the assembled packet will exceed our MTU; 2) Buffer fragment, if we till
1026 * lack further fragments; 3) Merge fragments, if we have all needed parts.
1027 *
1028 * Return NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
1029 */
1030int batadv_recv_frag_packet(struct sk_buff *skb,
1031 struct batadv_hard_iface *recv_if)
1032{
1033 struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
1034 struct batadv_orig_node *orig_node_src = NULL;
1035 struct batadv_frag_packet *frag_packet;
1036 int ret = NET_RX_DROP;
1037
1038 if (batadv_check_unicast_packet(bat_priv, skb,
1039 sizeof(*frag_packet)) < 0)
1040 goto out;
1041
1042 frag_packet = (struct batadv_frag_packet *)skb->data;
1043 orig_node_src = batadv_orig_hash_find(bat_priv, frag_packet->orig);
1044 if (!orig_node_src)
1045 goto out;
1046
1047 /* Route the fragment if it is not for us and too big to be merged. */
1048 if (!batadv_is_my_mac(bat_priv, frag_packet->dest) &&
1049 batadv_frag_skb_fwd(skb, recv_if, orig_node_src)) {
1050 ret = NET_RX_SUCCESS;
1051 goto out;
1052 }
1053
1054 batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_RX);
1055 batadv_add_counter(bat_priv, BATADV_CNT_FRAG_RX_BYTES, skb->len);
1056
1057 /* Add fragment to buffer and merge if possible. */
1058 if (!batadv_frag_skb_buffer(&skb, orig_node_src))
1059 goto out;
1060
1061 /* Deliver merged packet to the appropriate handler, if it was
1062 * merged
1063 */
1064 if (skb)
1065 batadv_batman_skb_recv(skb, recv_if->net_dev,
1066 &recv_if->batman_adv_ptype, NULL);
1067
1068 ret = NET_RX_SUCCESS;
1069
1070out:
1071 if (orig_node_src)
1072 batadv_orig_node_free_ref(orig_node_src);
1073
1074 return ret;
1075}
1076
1098int batadv_recv_bcast_packet(struct sk_buff *skb, 1077int batadv_recv_bcast_packet(struct sk_buff *skb,
1099 struct batadv_hard_iface *recv_if) 1078 struct batadv_hard_iface *recv_if)
1100{ 1079{
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index ea15fa6302ad..55d637a90621 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -30,8 +30,8 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
30 struct batadv_hard_iface *recv_if); 30 struct batadv_hard_iface *recv_if);
31int batadv_recv_unicast_packet(struct sk_buff *skb, 31int batadv_recv_unicast_packet(struct sk_buff *skb,
32 struct batadv_hard_iface *recv_if); 32 struct batadv_hard_iface *recv_if);
33int batadv_recv_ucast_frag_packet(struct sk_buff *skb, 33int batadv_recv_frag_packet(struct sk_buff *skb,
34 struct batadv_hard_iface *recv_if); 34 struct batadv_hard_iface *iface);
35int batadv_recv_bcast_packet(struct sk_buff *skb, 35int batadv_recv_bcast_packet(struct sk_buff *skb,
36 struct batadv_hard_iface *recv_if); 36 struct batadv_hard_iface *recv_if);
37int batadv_recv_tt_query(struct sk_buff *skb, 37int batadv_recv_tt_query(struct sk_buff *skb,
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 81d69fb97c17..82588e425641 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -25,10 +25,10 @@
25#include "soft-interface.h" 25#include "soft-interface.h"
26#include "hard-interface.h" 26#include "hard-interface.h"
27#include "gateway_common.h" 27#include "gateway_common.h"
28#include "gateway_client.h"
28#include "originator.h" 29#include "originator.h"
29#include "network-coding.h" 30#include "network-coding.h"
30 31#include "fragmentation.h"
31#include <linux/if_ether.h>
32 32
33static void batadv_send_outstanding_bcast_packet(struct work_struct *work); 33static void batadv_send_outstanding_bcast_packet(struct work_struct *work);
34 34
@@ -63,10 +63,10 @@ int batadv_send_skb_packet(struct sk_buff *skb,
63 ethhdr = eth_hdr(skb); 63 ethhdr = eth_hdr(skb);
64 memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); 64 memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN);
65 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); 65 memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN);
66 ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); 66 ethhdr->h_proto = htons(ETH_P_BATMAN);
67 67
68 skb_set_network_header(skb, ETH_HLEN); 68 skb_set_network_header(skb, ETH_HLEN);
69 skb->protocol = __constant_htons(ETH_P_BATMAN); 69 skb->protocol = htons(ETH_P_BATMAN);
70 70
71 skb->dev = hard_iface->net_dev; 71 skb->dev = hard_iface->net_dev;
72 72
@@ -108,7 +108,19 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
108 /* batadv_find_router() increases neigh_nodes refcount if found. */ 108 /* batadv_find_router() increases neigh_nodes refcount if found. */
109 neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); 109 neigh_node = batadv_find_router(bat_priv, orig_node, recv_if);
110 if (!neigh_node) 110 if (!neigh_node)
111 return ret; 111 goto out;
112
113 /* Check if the skb is too large to send in one piece and fragment
114 * it if needed.
115 */
116 if (atomic_read(&bat_priv->fragmentation) &&
117 skb->len > neigh_node->if_incoming->net_dev->mtu) {
118 /* Fragment and send packet. */
119 if (batadv_frag_send_packet(skb, orig_node, neigh_node))
120 ret = NET_XMIT_SUCCESS;
121
122 goto out;
123 }
112 124
113 /* try to network code the packet, if it is received on an interface 125 /* try to network code the packet, if it is received on an interface
114 * (i.e. being forwarded). If the packet originates from this node or if 126 * (i.e. being forwarded). If the packet originates from this node or if
@@ -122,8 +134,170 @@ int batadv_send_skb_to_orig(struct sk_buff *skb,
122 ret = NET_XMIT_SUCCESS; 134 ret = NET_XMIT_SUCCESS;
123 } 135 }
124 136
125 batadv_neigh_node_free_ref(neigh_node); 137out:
138 if (neigh_node)
139 batadv_neigh_node_free_ref(neigh_node);
140
141 return ret;
142}
126 143
144/**
145 * batadv_send_skb_push_fill_unicast - extend the buffer and initialize the
146 * common fields for unicast packets
147 * @skb: the skb carrying the unicast header to initialize
148 * @hdr_size: amount of bytes to push at the beginning of the skb
149 * @orig_node: the destination node
150 *
151 * Returns false if the buffer extension was not possible or true otherwise.
152 */
153static bool
154batadv_send_skb_push_fill_unicast(struct sk_buff *skb, int hdr_size,
155 struct batadv_orig_node *orig_node)
156{
157 struct batadv_unicast_packet *unicast_packet;
158 uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
159
160 if (batadv_skb_head_push(skb, hdr_size) < 0)
161 return false;
162
163 unicast_packet = (struct batadv_unicast_packet *)skb->data;
164 unicast_packet->header.version = BATADV_COMPAT_VERSION;
165 /* batman packet type: unicast */
166 unicast_packet->header.packet_type = BATADV_UNICAST;
167 /* set unicast ttl */
168 unicast_packet->header.ttl = BATADV_TTL;
169 /* copy the destination for faster routing */
170 memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
171 /* set the destination tt version number */
172 unicast_packet->ttvn = ttvn;
173
174 return true;
175}
176
177/**
178 * batadv_send_skb_prepare_unicast - encapsulate an skb with a unicast header
179 * @skb: the skb containing the payload to encapsulate
180 * @orig_node: the destination node
181 *
182 * Returns false if the payload could not be encapsulated or true otherwise.
183 */
184static bool batadv_send_skb_prepare_unicast(struct sk_buff *skb,
185 struct batadv_orig_node *orig_node)
186{
187 size_t uni_size = sizeof(struct batadv_unicast_packet);
188
189 return batadv_send_skb_push_fill_unicast(skb, uni_size, orig_node);
190}
191
192/**
193 * batadv_send_skb_prepare_unicast_4addr - encapsulate an skb with a
194 * unicast 4addr header
195 * @bat_priv: the bat priv with all the soft interface information
196 * @skb: the skb containing the payload to encapsulate
197 * @orig_node: the destination node
198 * @packet_subtype: the unicast 4addr packet subtype to use
199 *
200 * Returns false if the payload could not be encapsulated or true otherwise.
201 */
202bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
203 struct sk_buff *skb,
204 struct batadv_orig_node *orig,
205 int packet_subtype)
206{
207 struct batadv_hard_iface *primary_if;
208 struct batadv_unicast_4addr_packet *uc_4addr_packet;
209 bool ret = false;
210
211 primary_if = batadv_primary_if_get_selected(bat_priv);
212 if (!primary_if)
213 goto out;
214
215 /* Pull the header space and fill the unicast_packet substructure.
216 * We can do that because the first member of the uc_4addr_packet
217 * is of type struct unicast_packet
218 */
219 if (!batadv_send_skb_push_fill_unicast(skb, sizeof(*uc_4addr_packet),
220 orig))
221 goto out;
222
223 uc_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
224 uc_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR;
225 memcpy(uc_4addr_packet->src, primary_if->net_dev->dev_addr, ETH_ALEN);
226 uc_4addr_packet->subtype = packet_subtype;
227 uc_4addr_packet->reserved = 0;
228
229 ret = true;
230out:
231 if (primary_if)
232 batadv_hardif_free_ref(primary_if);
233 return ret;
234}
235
236/**
237 * batadv_send_generic_unicast_skb - send an skb as unicast
238 * @bat_priv: the bat priv with all the soft interface information
239 * @skb: payload to send
240 * @packet_type: the batman unicast packet type to use
241 * @packet_subtype: the unicast 4addr packet subtype (only relevant for unicast
242 * 4addr packets)
243 *
244 * Returns 1 in case of error or 0 otherwise.
245 */
246int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv,
247 struct sk_buff *skb, int packet_type,
248 int packet_subtype)
249{
250 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
251 struct batadv_unicast_packet *unicast_packet;
252 struct batadv_orig_node *orig_node;
253 int ret = NET_RX_DROP;
254
255 /* get routing information */
256 if (is_multicast_ether_addr(ethhdr->h_dest))
257 orig_node = batadv_gw_get_selected_orig(bat_priv);
258 else
259 /* check for tt host - increases orig_node refcount.
260 * returns NULL in case of AP isolation
261 */
262 orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
263 ethhdr->h_dest);
264
265 if (!orig_node)
266 goto out;
267
268 switch (packet_type) {
269 case BATADV_UNICAST:
270 batadv_send_skb_prepare_unicast(skb, orig_node);
271 break;
272 case BATADV_UNICAST_4ADDR:
273 batadv_send_skb_prepare_unicast_4addr(bat_priv, skb, orig_node,
274 packet_subtype);
275 break;
276 default:
277 /* this function supports UNICAST and UNICAST_4ADDR only. It
278 * should never be invoked with any other packet type
279 */
280 goto out;
281 }
282
283 unicast_packet = (struct batadv_unicast_packet *)skb->data;
284
285 /* inform the destination node that we are still missing a correct route
286 * for this client. The destination will receive this packet and will
287 * try to reroute it because the ttvn contained in the header is less
288 * than the current one
289 */
290 if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest))
291 unicast_packet->ttvn = unicast_packet->ttvn - 1;
292
293 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
294 ret = 0;
295
296out:
297 if (orig_node)
298 batadv_orig_node_free_ref(orig_node);
299 if (ret == NET_RX_DROP)
300 kfree_skb(skb);
127 return ret; 301 return ret;
128} 302}
129 303
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index e7b17880fca4..ad63184a4dd9 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -34,5 +34,45 @@ void batadv_send_outstanding_bat_ogm_packet(struct work_struct *work);
34void 34void
35batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, 35batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
36 const struct batadv_hard_iface *hard_iface); 36 const struct batadv_hard_iface *hard_iface);
37bool batadv_send_skb_prepare_unicast_4addr(struct batadv_priv *bat_priv,
38 struct sk_buff *skb,
39 struct batadv_orig_node *orig_node,
40 int packet_subtype);
41int batadv_send_skb_generic_unicast(struct batadv_priv *bat_priv,
42 struct sk_buff *skb, int packet_type,
43 int packet_subtype);
44
45
46/**
47 * batadv_send_unicast_skb - send the skb encapsulated in a unicast packet
48 * @bat_priv: the bat priv with all the soft interface information
49 * @skb: the payload to send
50 *
51 * Returns 1 in case of error or 0 otherwise.
52 */
53static inline int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
54 struct sk_buff *skb)
55{
56 return batadv_send_skb_generic_unicast(bat_priv, skb, BATADV_UNICAST,
57 0);
58}
59
60/**
61 * batadv_send_4addr_unicast_skb - send the skb encapsulated in a unicast 4addr
62 * packet
63 * @bat_priv: the bat priv with all the soft interface information
64 * @skb: the payload to send
65 * @packet_subtype: the unicast 4addr packet subtype to use
66 *
67 * Returns 1 in case of error or 0 otherwise.
68 */
69static inline int batadv_send_skb_unicast_4addr(struct batadv_priv *bat_priv,
70 struct sk_buff *skb,
71 int packet_subtype)
72{
73 return batadv_send_skb_generic_unicast(bat_priv, skb,
74 BATADV_UNICAST_4ADDR,
75 packet_subtype);
76}
37 77
38#endif /* _NET_BATMAN_ADV_SEND_H_ */ 78#endif /* _NET_BATMAN_ADV_SEND_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 25e6004e8e01..e8a2bd699d40 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -34,8 +34,6 @@
34#include <linux/ethtool.h> 34#include <linux/ethtool.h>
35#include <linux/etherdevice.h> 35#include <linux/etherdevice.h>
36#include <linux/if_vlan.h> 36#include <linux/if_vlan.h>
37#include <linux/if_ether.h>
38#include "unicast.h"
39#include "bridge_loop_avoidance.h" 37#include "bridge_loop_avoidance.h"
40#include "network-coding.h" 38#include "network-coding.h"
41 39
@@ -139,6 +137,18 @@ static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
139 return 0; 137 return 0;
140} 138}
141 139
140/**
141 * batadv_interface_set_rx_mode - set the rx mode of a device
142 * @dev: registered network device to modify
143 *
144 * We do not actually need to set any rx filters for the virtual batman
145 * soft interface. However a dummy handler enables a user to set static
146 * multicast listeners for instance.
147 */
148static void batadv_interface_set_rx_mode(struct net_device *dev)
149{
150}
151
142static int batadv_interface_tx(struct sk_buff *skb, 152static int batadv_interface_tx(struct sk_buff *skb,
143 struct net_device *soft_iface) 153 struct net_device *soft_iface)
144{ 154{
@@ -147,7 +157,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
147 struct batadv_hard_iface *primary_if = NULL; 157 struct batadv_hard_iface *primary_if = NULL;
148 struct batadv_bcast_packet *bcast_packet; 158 struct batadv_bcast_packet *bcast_packet;
149 struct vlan_ethhdr *vhdr; 159 struct vlan_ethhdr *vhdr;
150 __be16 ethertype = __constant_htons(ETH_P_BATMAN); 160 __be16 ethertype = htons(ETH_P_BATMAN);
151 static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 161 static const uint8_t stp_addr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00,
152 0x00, 0x00}; 162 0x00, 0x00};
153 static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00, 163 static const uint8_t ectp_addr[ETH_ALEN] = {0xCF, 0x00, 0x00, 0x00,
@@ -286,7 +296,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
286 296
287 batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb); 297 batadv_dat_snoop_outgoing_arp_reply(bat_priv, skb);
288 298
289 ret = batadv_unicast_send_skb(bat_priv, skb); 299 ret = batadv_send_skb_unicast(bat_priv, skb);
290 if (ret != 0) 300 if (ret != 0)
291 goto dropped_freed; 301 goto dropped_freed;
292 } 302 }
@@ -314,7 +324,7 @@ void batadv_interface_rx(struct net_device *soft_iface,
314 struct vlan_ethhdr *vhdr; 324 struct vlan_ethhdr *vhdr;
315 struct batadv_header *batadv_header = (struct batadv_header *)skb->data; 325 struct batadv_header *batadv_header = (struct batadv_header *)skb->data;
316 unsigned short vid __maybe_unused = BATADV_NO_FLAGS; 326 unsigned short vid __maybe_unused = BATADV_NO_FLAGS;
317 __be16 ethertype = __constant_htons(ETH_P_BATMAN); 327 __be16 ethertype = htons(ETH_P_BATMAN);
318 bool is_bcast; 328 bool is_bcast;
319 329
320 is_bcast = (batadv_header->packet_type == BATADV_BCAST); 330 is_bcast = (batadv_header->packet_type == BATADV_BCAST);
@@ -444,6 +454,7 @@ static void batadv_softif_destroy_finish(struct work_struct *work)
444static int batadv_softif_init_late(struct net_device *dev) 454static int batadv_softif_init_late(struct net_device *dev)
445{ 455{
446 struct batadv_priv *bat_priv; 456 struct batadv_priv *bat_priv;
457 uint32_t random_seqno;
447 int ret; 458 int ret;
448 size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; 459 size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM;
449 460
@@ -493,6 +504,10 @@ static int batadv_softif_init_late(struct net_device *dev)
493 bat_priv->tt.last_changeset = NULL; 504 bat_priv->tt.last_changeset = NULL;
494 bat_priv->tt.last_changeset_len = 0; 505 bat_priv->tt.last_changeset_len = 0;
495 506
507 /* randomize initial seqno to avoid collision */
508 get_random_bytes(&random_seqno, sizeof(random_seqno));
509 atomic_set(&bat_priv->frag_seqno, random_seqno);
510
496 bat_priv->primary_if = NULL; 511 bat_priv->primary_if = NULL;
497 bat_priv->num_ifaces = 0; 512 bat_priv->num_ifaces = 0;
498 513
@@ -580,6 +595,7 @@ static const struct net_device_ops batadv_netdev_ops = {
580 .ndo_get_stats = batadv_interface_stats, 595 .ndo_get_stats = batadv_interface_stats,
581 .ndo_set_mac_address = batadv_interface_set_mac_addr, 596 .ndo_set_mac_address = batadv_interface_set_mac_addr,
582 .ndo_change_mtu = batadv_interface_change_mtu, 597 .ndo_change_mtu = batadv_interface_change_mtu,
598 .ndo_set_rx_mode = batadv_interface_set_rx_mode,
583 .ndo_start_xmit = batadv_interface_tx, 599 .ndo_start_xmit = batadv_interface_tx,
584 .ndo_validate_addr = eth_validate_addr, 600 .ndo_validate_addr = eth_validate_addr,
585 .ndo_add_slave = batadv_softif_slave_add, 601 .ndo_add_slave = batadv_softif_slave_add,
@@ -623,7 +639,7 @@ static void batadv_softif_init_early(struct net_device *dev)
623 */ 639 */
624 dev->mtu = ETH_DATA_LEN; 640 dev->mtu = ETH_DATA_LEN;
625 /* reserve more space in the skbuff for our header */ 641 /* reserve more space in the skbuff for our header */
626 dev->hard_header_len = BATADV_HEADER_LEN; 642 dev->hard_header_len = batadv_max_header_len();
627 643
628 /* generate random address */ 644 /* generate random address */
629 eth_hw_addr_random(dev); 645 eth_hw_addr_random(dev);
@@ -760,6 +776,12 @@ static const struct {
760 { "mgmt_tx_bytes" }, 776 { "mgmt_tx_bytes" },
761 { "mgmt_rx" }, 777 { "mgmt_rx" },
762 { "mgmt_rx_bytes" }, 778 { "mgmt_rx_bytes" },
779 { "frag_tx" },
780 { "frag_tx_bytes" },
781 { "frag_rx" },
782 { "frag_rx_bytes" },
783 { "frag_fwd" },
784 { "frag_fwd_bytes" },
763 { "tt_request_tx" }, 785 { "tt_request_tx" },
764 { "tt_request_rx" }, 786 { "tt_request_rx" },
765 { "tt_response_tx" }, 787 { "tt_response_tx" },
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index c7416947a4e0..b521afb186d4 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -117,25 +117,17 @@ batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry)
117 kfree_rcu(tt_local_entry, common.rcu); 117 kfree_rcu(tt_local_entry, common.rcu);
118} 118}
119 119
120static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) 120/**
121{ 121 * batadv_tt_global_entry_free_ref - decrement the refcounter for a
122 struct batadv_tt_common_entry *tt_common_entry; 122 * tt_global_entry and possibly free it
123 struct batadv_tt_global_entry *tt_global_entry; 123 * @tt_global_entry: the object to free
124 124 */
125 tt_common_entry = container_of(rcu, struct batadv_tt_common_entry, rcu);
126 tt_global_entry = container_of(tt_common_entry,
127 struct batadv_tt_global_entry, common);
128
129 kfree(tt_global_entry);
130}
131
132static void 125static void
133batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry) 126batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry)
134{ 127{
135 if (atomic_dec_and_test(&tt_global_entry->common.refcount)) { 128 if (atomic_dec_and_test(&tt_global_entry->common.refcount)) {
136 batadv_tt_global_del_orig_list(tt_global_entry); 129 batadv_tt_global_del_orig_list(tt_global_entry);
137 call_rcu(&tt_global_entry->common.rcu, 130 kfree_rcu(tt_global_entry, common.rcu);
138 batadv_tt_global_entry_free_rcu);
139 } 131 }
140} 132}
141 133
@@ -240,6 +232,17 @@ static int batadv_tt_len(int changes_num)
240 return changes_num * sizeof(struct batadv_tvlv_tt_change); 232 return changes_num * sizeof(struct batadv_tvlv_tt_change);
241} 233}
242 234
235/**
236 * batadv_tt_entries - compute the number of entries fitting in tt_len bytes
237 * @tt_len: available space
238 *
239 * Returns the number of entries.
240 */
241static uint16_t batadv_tt_entries(uint16_t tt_len)
242{
243 return tt_len / batadv_tt_len(1);
244}
245
243static int batadv_tt_local_init(struct batadv_priv *bat_priv) 246static int batadv_tt_local_init(struct batadv_priv *bat_priv)
244{ 247{
245 if (bat_priv->tt.local_hash) 248 if (bat_priv->tt.local_hash)
@@ -414,7 +417,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv)
414 if (tt_diff_len == 0) 417 if (tt_diff_len == 0)
415 goto container_register; 418 goto container_register;
416 419
417 tt_diff_entries_num = tt_diff_len / batadv_tt_len(1); 420 tt_diff_entries_num = batadv_tt_entries(tt_diff_len);
418 421
419 spin_lock_bh(&bat_priv->tt.changes_list_lock); 422 spin_lock_bh(&bat_priv->tt.changes_list_lock);
420 atomic_set(&bat_priv->tt.local_changes, 0); 423 atomic_set(&bat_priv->tt.local_changes, 0);
@@ -805,15 +808,17 @@ out:
805 * If a TT local entry exists for this non-mesh client remove it. 808 * If a TT local entry exists for this non-mesh client remove it.
806 * 809 *
807 * The caller must hold orig_node refcount. 810 * The caller must hold orig_node refcount.
811 *
812 * Return true if the new entry has been added, false otherwise
808 */ 813 */
809int batadv_tt_global_add(struct batadv_priv *bat_priv, 814static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
810 struct batadv_orig_node *orig_node, 815 struct batadv_orig_node *orig_node,
811 const unsigned char *tt_addr, uint16_t flags, 816 const unsigned char *tt_addr, uint16_t flags,
812 uint8_t ttvn) 817 uint8_t ttvn)
813{ 818{
814 struct batadv_tt_global_entry *tt_global_entry; 819 struct batadv_tt_global_entry *tt_global_entry;
815 struct batadv_tt_local_entry *tt_local_entry; 820 struct batadv_tt_local_entry *tt_local_entry;
816 int ret = 0; 821 bool ret = false;
817 int hash_added; 822 int hash_added;
818 struct batadv_tt_common_entry *common; 823 struct batadv_tt_common_entry *common;
819 uint16_t local_flags; 824 uint16_t local_flags;
@@ -914,7 +919,7 @@ add_orig_entry:
914 batadv_dbg(BATADV_DBG_TT, bat_priv, 919 batadv_dbg(BATADV_DBG_TT, bat_priv,
915 "Creating new global tt entry: %pM (via %pM)\n", 920 "Creating new global tt entry: %pM (via %pM)\n",
916 common->addr, orig_node->orig); 921 common->addr, orig_node->orig);
917 ret = 1; 922 ret = true;
918 923
919out_remove: 924out_remove:
920 925
@@ -1491,11 +1496,9 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
1491 1496
1492static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, 1497static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv,
1493 struct batadv_orig_node *orig_node, 1498 struct batadv_orig_node *orig_node,
1494 const unsigned char *tt_buff, 1499 const void *tt_buff,
1495 uint16_t tt_num_changes) 1500 uint16_t tt_buff_len)
1496{ 1501{
1497 uint16_t tt_buff_len = batadv_tt_len(tt_num_changes);
1498
1499 /* Replace the old buffer only if I received something in the 1502 /* Replace the old buffer only if I received something in the
1500 * last OGM (the OGM could carry no changes) 1503 * last OGM (the OGM could carry no changes)
1501 */ 1504 */
@@ -1622,7 +1625,7 @@ batadv_tt_tvlv_generate(struct batadv_priv *bat_priv,
1622 tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change); 1625 tt_len -= tt_len % sizeof(struct batadv_tvlv_tt_change);
1623 } 1626 }
1624 1627
1625 tt_tot = tt_len / sizeof(struct batadv_tvlv_tt_change); 1628 tt_tot = batadv_tt_entries(tt_len);
1626 1629
1627 tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len, 1630 tvlv_tt_data = kzalloc(sizeof(*tvlv_tt_data) + tt_len,
1628 GFP_ATOMIC); 1631 GFP_ATOMIC);
@@ -2032,8 +2035,8 @@ static void batadv_tt_update_changes(struct batadv_priv *bat_priv,
2032 _batadv_tt_update_changes(bat_priv, orig_node, tt_change, 2035 _batadv_tt_update_changes(bat_priv, orig_node, tt_change,
2033 tt_num_changes, ttvn); 2036 tt_num_changes, ttvn);
2034 2037
2035 batadv_tt_save_orig_buffer(bat_priv, orig_node, 2038 batadv_tt_save_orig_buffer(bat_priv, orig_node, tt_change,
2036 (unsigned char *)tt_change, tt_num_changes); 2039 batadv_tt_len(tt_num_changes));
2037 atomic_set(&orig_node->last_ttvn, ttvn); 2040 atomic_set(&orig_node->last_ttvn, ttvn);
2038} 2041}
2039 2042
@@ -2573,7 +2576,7 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
2573 tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; 2576 tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
2574 tvlv_value_len -= sizeof(*tt_data); 2577 tvlv_value_len -= sizeof(*tt_data);
2575 2578
2576 num_entries = tvlv_value_len / batadv_tt_len(1); 2579 num_entries = batadv_tt_entries(tvlv_value_len);
2577 2580
2578 batadv_tt_update_orig(bat_priv, orig, 2581 batadv_tt_update_orig(bat_priv, orig,
2579 (unsigned char *)(tt_data + 1), 2582 (unsigned char *)(tt_data + 1),
@@ -2608,7 +2611,7 @@ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
2608 tt_data = (struct batadv_tvlv_tt_data *)tvlv_value; 2611 tt_data = (struct batadv_tvlv_tt_data *)tvlv_value;
2609 tvlv_value_len -= sizeof(*tt_data); 2612 tvlv_value_len -= sizeof(*tt_data);
2610 2613
2611 num_entries = tvlv_value_len / batadv_tt_len(1); 2614 num_entries = batadv_tt_entries(tvlv_value_len);
2612 2615
2613 switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) { 2616 switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) {
2614 case BATADV_TT_REQUEST: 2617 case BATADV_TT_REQUEST:
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index b4b6dea4e2be..015d8b9e63b9 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -27,13 +27,6 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
27 const uint8_t *addr, const char *message, 27 const uint8_t *addr, const char *message,
28 bool roaming); 28 bool roaming);
29int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); 29int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
30void batadv_tt_global_add_orig(struct batadv_priv *bat_priv,
31 struct batadv_orig_node *orig_node,
32 const unsigned char *tt_buff, int tt_buff_len);
33int batadv_tt_global_add(struct batadv_priv *bat_priv,
34 struct batadv_orig_node *orig_node,
35 const unsigned char *addr, uint16_t flags,
36 uint8_t ttvn);
37int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); 30int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
38void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, 31void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
39 struct batadv_orig_node *orig_node, 32 struct batadv_orig_node *orig_node,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8fbd89d167cd..5cbb0d09a9b5 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -24,13 +24,6 @@
24#include "bitarray.h" 24#include "bitarray.h"
25#include <linux/kernel.h> 25#include <linux/kernel.h>
26 26
27/**
28 * Maximum overhead for the encapsulation for a payload packet
29 */
30#define BATADV_HEADER_LEN \
31 (ETH_HLEN + max(sizeof(struct batadv_unicast_packet), \
32 sizeof(struct batadv_bcast_packet)))
33
34#ifdef CONFIG_BATMAN_ADV_DAT 27#ifdef CONFIG_BATMAN_ADV_DAT
35 28
36/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed, 29/* batadv_dat_addr_t is the type used for all DHT addresses. If it is changed,
@@ -60,7 +53,6 @@ struct batadv_hard_iface_bat_iv {
60 * @if_num: identificator of the interface 53 * @if_num: identificator of the interface
61 * @if_status: status of the interface for batman-adv 54 * @if_status: status of the interface for batman-adv
62 * @net_dev: pointer to the net_device 55 * @net_dev: pointer to the net_device
63 * @frag_seqno: last fragment sequence number sent by this interface
64 * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) 56 * @num_bcasts: number of payload re-broadcasts on this interface (ARQ)
65 * @hardif_obj: kobject of the per interface sysfs "mesh" directory 57 * @hardif_obj: kobject of the per interface sysfs "mesh" directory
66 * @refcount: number of contexts the object is used 58 * @refcount: number of contexts the object is used
@@ -76,7 +68,6 @@ struct batadv_hard_iface {
76 int16_t if_num; 68 int16_t if_num;
77 char if_status; 69 char if_status;
78 struct net_device *net_dev; 70 struct net_device *net_dev;
79 atomic_t frag_seqno;
80 uint8_t num_bcasts; 71 uint8_t num_bcasts;
81 struct kobject *hardif_obj; 72 struct kobject *hardif_obj;
82 atomic_t refcount; 73 atomic_t refcount;
@@ -88,6 +79,34 @@ struct batadv_hard_iface {
88}; 79};
89 80
90/** 81/**
82 * struct batadv_frag_table_entry - head in the fragment buffer table
83 * @head: head of list with fragments
84 * @lock: lock to protect the list of fragments
85 * @timestamp: time (jiffie) of last received fragment
86 * @seqno: sequence number of the fragments in the list
87 * @size: accumulated size of packets in list
88 */
89struct batadv_frag_table_entry {
90 struct hlist_head head;
91 spinlock_t lock; /* protects head */
92 unsigned long timestamp;
93 uint16_t seqno;
94 uint16_t size;
95};
96
97/**
98 * struct batadv_frag_list_entry - entry in a list of fragments
99 * @list: list node information
100 * @skb: fragment
101 * @no: fragment number in the set
102 */
103struct batadv_frag_list_entry {
104 struct hlist_node list;
105 struct sk_buff *skb;
106 uint8_t no;
107};
108
109/**
91 * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh 110 * struct batadv_orig_node - structure for orig_list maintaining nodes of mesh
92 * @orig: originator ethernet address 111 * @orig: originator ethernet address
93 * @primary_addr: hosts primary interface address 112 * @primary_addr: hosts primary interface address
@@ -116,9 +135,6 @@ struct batadv_hard_iface {
116 * last_bcast_seqno) 135 * last_bcast_seqno)
117 * @last_bcast_seqno: last broadcast sequence number received by this host 136 * @last_bcast_seqno: last broadcast sequence number received by this host
118 * @neigh_list: list of potential next hop neighbor towards this orig node 137 * @neigh_list: list of potential next hop neighbor towards this orig node
119 * @frag_list: fragmentation buffer list for fragment re-assembly
120 * @last_frag_packet: time when last fragmented packet from this node was
121 * received
122 * @neigh_list_lock: lock protecting neigh_list, router and bonding_list 138 * @neigh_list_lock: lock protecting neigh_list, router and bonding_list
123 * @hash_entry: hlist node for batadv_priv::orig_hash 139 * @hash_entry: hlist node for batadv_priv::orig_hash
124 * @bat_priv: pointer to soft_iface this orig node belongs to 140 * @bat_priv: pointer to soft_iface this orig node belongs to
@@ -133,6 +149,7 @@ struct batadv_hard_iface {
133 * @out_coding_list: list of nodes that can hear this orig 149 * @out_coding_list: list of nodes that can hear this orig
134 * @in_coding_list_lock: protects in_coding_list 150 * @in_coding_list_lock: protects in_coding_list
135 * @out_coding_list_lock: protects out_coding_list 151 * @out_coding_list_lock: protects out_coding_list
152 * @fragments: array with heads for fragment chains
136 */ 153 */
137struct batadv_orig_node { 154struct batadv_orig_node {
138 uint8_t orig[ETH_ALEN]; 155 uint8_t orig[ETH_ALEN];
@@ -159,8 +176,6 @@ struct batadv_orig_node {
159 DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE); 176 DECLARE_BITMAP(bcast_bits, BATADV_TQ_LOCAL_WINDOW_SIZE);
160 uint32_t last_bcast_seqno; 177 uint32_t last_bcast_seqno;
161 struct hlist_head neigh_list; 178 struct hlist_head neigh_list;
162 struct list_head frag_list;
163 unsigned long last_frag_packet;
164 /* neigh_list_lock protects: neigh_list, router & bonding_list */ 179 /* neigh_list_lock protects: neigh_list, router & bonding_list */
165 spinlock_t neigh_list_lock; 180 spinlock_t neigh_list_lock;
166 struct hlist_node hash_entry; 181 struct hlist_node hash_entry;
@@ -181,6 +196,7 @@ struct batadv_orig_node {
181 spinlock_t in_coding_list_lock; /* Protects in_coding_list */ 196 spinlock_t in_coding_list_lock; /* Protects in_coding_list */
182 spinlock_t out_coding_list_lock; /* Protects out_coding_list */ 197 spinlock_t out_coding_list_lock; /* Protects out_coding_list */
183#endif 198#endif
199 struct batadv_frag_table_entry fragments[BATADV_FRAG_BUFFER_COUNT];
184}; 200};
185 201
186/** 202/**
@@ -277,6 +293,12 @@ struct batadv_bcast_duplist_entry {
277 * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter 293 * @BATADV_CNT_MGMT_TX_BYTES: transmitted routing protocol traffic bytes counter
278 * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter 294 * @BATADV_CNT_MGMT_RX: received routing protocol traffic packet counter
279 * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter 295 * @BATADV_CNT_MGMT_RX_BYTES: received routing protocol traffic bytes counter
296 * @BATADV_CNT_FRAG_TX: transmitted fragment traffic packet counter
297 * @BATADV_CNT_FRAG_TX_BYTES: transmitted fragment traffic bytes counter
298 * @BATADV_CNT_FRAG_RX: received fragment traffic packet counter
299 * @BATADV_CNT_FRAG_RX_BYTES: received fragment traffic bytes counter
300 * @BATADV_CNT_FRAG_FWD: forwarded fragment traffic packet counter
301 * @BATADV_CNT_FRAG_FWD_BYTES: forwarded fragment traffic bytes counter
280 * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter 302 * @BATADV_CNT_TT_REQUEST_TX: transmitted tt req traffic packet counter
281 * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter 303 * @BATADV_CNT_TT_REQUEST_RX: received tt req traffic packet counter
282 * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter 304 * @BATADV_CNT_TT_RESPONSE_TX: transmitted tt resp traffic packet counter
@@ -314,6 +336,12 @@ enum batadv_counters {
314 BATADV_CNT_MGMT_TX_BYTES, 336 BATADV_CNT_MGMT_TX_BYTES,
315 BATADV_CNT_MGMT_RX, 337 BATADV_CNT_MGMT_RX,
316 BATADV_CNT_MGMT_RX_BYTES, 338 BATADV_CNT_MGMT_RX_BYTES,
339 BATADV_CNT_FRAG_TX,
340 BATADV_CNT_FRAG_TX_BYTES,
341 BATADV_CNT_FRAG_RX,
342 BATADV_CNT_FRAG_RX_BYTES,
343 BATADV_CNT_FRAG_FWD,
344 BATADV_CNT_FRAG_FWD_BYTES,
317 BATADV_CNT_TT_REQUEST_TX, 345 BATADV_CNT_TT_REQUEST_TX,
318 BATADV_CNT_TT_REQUEST_RX, 346 BATADV_CNT_TT_REQUEST_RX,
319 BATADV_CNT_TT_RESPONSE_TX, 347 BATADV_CNT_TT_RESPONSE_TX,
@@ -511,6 +539,7 @@ struct batadv_priv_nc {
511 * @aggregated_ogms: bool indicating whether OGM aggregation is enabled 539 * @aggregated_ogms: bool indicating whether OGM aggregation is enabled
512 * @bonding: bool indicating whether traffic bonding is enabled 540 * @bonding: bool indicating whether traffic bonding is enabled
513 * @fragmentation: bool indicating whether traffic fragmentation is enabled 541 * @fragmentation: bool indicating whether traffic fragmentation is enabled
542 * @frag_seqno: incremental counter to identify chains of egress fragments
514 * @ap_isolation: bool indicating whether ap isolation is enabled 543 * @ap_isolation: bool indicating whether ap isolation is enabled
515 * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is 544 * @bridge_loop_avoidance: bool indicating whether bridge loop avoidance is
516 * enabled 545 * enabled
@@ -554,6 +583,7 @@ struct batadv_priv {
554 atomic_t aggregated_ogms; 583 atomic_t aggregated_ogms;
555 atomic_t bonding; 584 atomic_t bonding;
556 atomic_t fragmentation; 585 atomic_t fragmentation;
586 atomic_t frag_seqno;
557 atomic_t ap_isolation; 587 atomic_t ap_isolation;
558#ifdef CONFIG_BATMAN_ADV_BLA 588#ifdef CONFIG_BATMAN_ADV_BLA
559 atomic_t bridge_loop_avoidance; 589 atomic_t bridge_loop_avoidance;
@@ -874,18 +904,6 @@ struct batadv_forw_packet {
874}; 904};
875 905
876/** 906/**
877 * struct batadv_frag_packet_list_entry - storage for fragment packet
878 * @list: list node for orig_node::frag_list
879 * @seqno: sequence number of the fragment
880 * @skb: fragment's skb buffer
881 */
882struct batadv_frag_packet_list_entry {
883 struct list_head list;
884 uint16_t seqno;
885 struct sk_buff *skb;
886};
887
888/**
889 * struct batadv_algo_ops - mesh algorithm callbacks 907 * struct batadv_algo_ops - mesh algorithm callbacks
890 * @list: list node for the batadv_algo_list 908 * @list: list node for the batadv_algo_list
891 * @name: name of the algorithm 909 * @name: name of the algorithm
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
deleted file mode 100644
index 48b31d33ce6b..000000000000
--- a/net/batman-adv/unicast.c
+++ /dev/null
@@ -1,491 +0,0 @@
1/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
2 *
3 * Andreas Langer
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#include "main.h"
21#include "unicast.h"
22#include "send.h"
23#include "soft-interface.h"
24#include "gateway_client.h"
25#include "originator.h"
26#include "hash.h"
27#include "translation-table.h"
28#include "routing.h"
29#include "hard-interface.h"
30
31
32static struct sk_buff *
33batadv_frag_merge_packet(struct list_head *head,
34 struct batadv_frag_packet_list_entry *tfp,
35 struct sk_buff *skb)
36{
37 struct batadv_unicast_frag_packet *up;
38 struct sk_buff *tmp_skb;
39 struct batadv_unicast_packet *unicast_packet;
40 int hdr_len = sizeof(*unicast_packet);
41 int uni_diff = sizeof(*up) - hdr_len;
42 uint8_t *packet_pos;
43
44 up = (struct batadv_unicast_frag_packet *)skb->data;
45 /* set skb to the first part and tmp_skb to the second part */
46 if (up->flags & BATADV_UNI_FRAG_HEAD) {
47 tmp_skb = tfp->skb;
48 } else {
49 tmp_skb = skb;
50 skb = tfp->skb;
51 }
52
53 if (skb_linearize(skb) < 0 || skb_linearize(tmp_skb) < 0)
54 goto err;
55
56 skb_pull(tmp_skb, sizeof(*up));
57 if (pskb_expand_head(skb, 0, tmp_skb->len, GFP_ATOMIC) < 0)
58 goto err;
59
60 /* move free entry to end */
61 tfp->skb = NULL;
62 tfp->seqno = 0;
63 list_move_tail(&tfp->list, head);
64
65 memcpy(skb_put(skb, tmp_skb->len), tmp_skb->data, tmp_skb->len);
66 kfree_skb(tmp_skb);
67
68 memmove(skb->data + uni_diff, skb->data, hdr_len);
69 packet_pos = skb_pull(skb, uni_diff);
70 unicast_packet = (struct batadv_unicast_packet *)packet_pos;
71 unicast_packet->header.packet_type = BATADV_UNICAST;
72
73 return skb;
74
75err:
76 /* free buffered skb, skb will be freed later */
77 kfree_skb(tfp->skb);
78 return NULL;
79}
80
81static void batadv_frag_create_entry(struct list_head *head,
82 struct sk_buff *skb)
83{
84 struct batadv_frag_packet_list_entry *tfp;
85 struct batadv_unicast_frag_packet *up;
86
87 up = (struct batadv_unicast_frag_packet *)skb->data;
88
89 /* free and oldest packets stand at the end */
90 tfp = list_entry((head)->prev, typeof(*tfp), list);
91 kfree_skb(tfp->skb);
92
93 tfp->seqno = ntohs(up->seqno);
94 tfp->skb = skb;
95 list_move(&tfp->list, head);
96 return;
97}
98
99static int batadv_frag_create_buffer(struct list_head *head)
100{
101 int i;
102 struct batadv_frag_packet_list_entry *tfp;
103
104 for (i = 0; i < BATADV_FRAG_BUFFER_SIZE; i++) {
105 tfp = kmalloc(sizeof(*tfp), GFP_ATOMIC);
106 if (!tfp) {
107 batadv_frag_list_free(head);
108 return -ENOMEM;
109 }
110 tfp->skb = NULL;
111 tfp->seqno = 0;
112 INIT_LIST_HEAD(&tfp->list);
113 list_add(&tfp->list, head);
114 }
115
116 return 0;
117}
118
119static struct batadv_frag_packet_list_entry *
120batadv_frag_search_packet(struct list_head *head,
121 const struct batadv_unicast_frag_packet *up)
122{
123 struct batadv_frag_packet_list_entry *tfp;
124 struct batadv_unicast_frag_packet *tmp_up = NULL;
125 bool is_head_tmp, is_head;
126 uint16_t search_seqno;
127
128 if (up->flags & BATADV_UNI_FRAG_HEAD)
129 search_seqno = ntohs(up->seqno)+1;
130 else
131 search_seqno = ntohs(up->seqno)-1;
132
133 is_head = up->flags & BATADV_UNI_FRAG_HEAD;
134
135 list_for_each_entry(tfp, head, list) {
136 if (!tfp->skb)
137 continue;
138
139 if (tfp->seqno == ntohs(up->seqno))
140 goto mov_tail;
141
142 tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;
143
144 if (tfp->seqno == search_seqno) {
145 is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD;
146 if (is_head_tmp != is_head)
147 return tfp;
148 else
149 goto mov_tail;
150 }
151 }
152 return NULL;
153
154mov_tail:
155 list_move_tail(&tfp->list, head);
156 return NULL;
157}
158
159void batadv_frag_list_free(struct list_head *head)
160{
161 struct batadv_frag_packet_list_entry *pf, *tmp_pf;
162
163 if (!list_empty(head)) {
164 list_for_each_entry_safe(pf, tmp_pf, head, list) {
165 kfree_skb(pf->skb);
166 list_del(&pf->list);
167 kfree(pf);
168 }
169 }
170 return;
171}
172
173/* frag_reassemble_skb():
174 * returns NET_RX_DROP if the operation failed - skb is left intact
175 * returns NET_RX_SUCCESS if the fragment was buffered (skb_new will be NULL)
176 * or the skb could be reassembled (skb_new will point to the new packet and
177 * skb was freed)
178 */
179int batadv_frag_reassemble_skb(struct sk_buff *skb,
180 struct batadv_priv *bat_priv,
181 struct sk_buff **new_skb)
182{
183 struct batadv_orig_node *orig_node;
184 struct batadv_frag_packet_list_entry *tmp_frag_entry;
185 int ret = NET_RX_DROP;
186 struct batadv_unicast_frag_packet *unicast_packet;
187
188 unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
189 *new_skb = NULL;
190
191 orig_node = batadv_orig_hash_find(bat_priv, unicast_packet->orig);
192 if (!orig_node)
193 goto out;
194
195 orig_node->last_frag_packet = jiffies;
196
197 if (list_empty(&orig_node->frag_list) &&
198 batadv_frag_create_buffer(&orig_node->frag_list)) {
199 pr_debug("couldn't create frag buffer\n");
200 goto out;
201 }
202
203 tmp_frag_entry = batadv_frag_search_packet(&orig_node->frag_list,
204 unicast_packet);
205
206 if (!tmp_frag_entry) {
207 batadv_frag_create_entry(&orig_node->frag_list, skb);
208 ret = NET_RX_SUCCESS;
209 goto out;
210 }
211
212 *new_skb = batadv_frag_merge_packet(&orig_node->frag_list,
213 tmp_frag_entry, skb);
214 /* if not, merge failed */
215 if (*new_skb)
216 ret = NET_RX_SUCCESS;
217
218out:
219 if (orig_node)
220 batadv_orig_node_free_ref(orig_node);
221 return ret;
222}
223
224int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
225 struct batadv_hard_iface *hard_iface,
226 const uint8_t dstaddr[])
227{
228 struct batadv_unicast_packet tmp_uc, *unicast_packet;
229 struct batadv_hard_iface *primary_if;
230 struct sk_buff *frag_skb;
231 struct batadv_unicast_frag_packet *frag1, *frag2;
232 int uc_hdr_len = sizeof(*unicast_packet);
233 int ucf_hdr_len = sizeof(*frag1);
234 int data_len = skb->len - uc_hdr_len;
235 int large_tail = 0, ret = NET_RX_DROP;
236 uint16_t seqno;
237
238 primary_if = batadv_primary_if_get_selected(bat_priv);
239 if (!primary_if)
240 goto dropped;
241
242 frag_skb = dev_alloc_skb(data_len - (data_len / 2) + ucf_hdr_len);
243 if (!frag_skb)
244 goto dropped;
245
246 skb->priority = TC_PRIO_CONTROL;
247 skb_reserve(frag_skb, ucf_hdr_len);
248
249 unicast_packet = (struct batadv_unicast_packet *)skb->data;
250 memcpy(&tmp_uc, unicast_packet, uc_hdr_len);
251 skb_split(skb, frag_skb, data_len / 2 + uc_hdr_len);
252
253 if (batadv_skb_head_push(skb, ucf_hdr_len - uc_hdr_len) < 0 ||
254 batadv_skb_head_push(frag_skb, ucf_hdr_len) < 0)
255 goto drop_frag;
256
257 frag1 = (struct batadv_unicast_frag_packet *)skb->data;
258 frag2 = (struct batadv_unicast_frag_packet *)frag_skb->data;
259
260 memcpy(frag1, &tmp_uc, sizeof(tmp_uc));
261
262 frag1->header.ttl--;
263 frag1->header.version = BATADV_COMPAT_VERSION;
264 frag1->header.packet_type = BATADV_UNICAST_FRAG;
265
266 memcpy(frag1->orig, primary_if->net_dev->dev_addr, ETH_ALEN);
267 memcpy(frag2, frag1, sizeof(*frag2));
268
269 if (data_len & 1)
270 large_tail = BATADV_UNI_FRAG_LARGETAIL;
271
272 frag1->flags = BATADV_UNI_FRAG_HEAD | large_tail;
273 frag2->flags = large_tail;
274
275 seqno = atomic_add_return(2, &hard_iface->frag_seqno);
276 frag1->seqno = htons(seqno - 1);
277 frag2->seqno = htons(seqno);
278
279 batadv_send_skb_packet(skb, hard_iface, dstaddr);
280 batadv_send_skb_packet(frag_skb, hard_iface, dstaddr);
281 ret = NET_RX_SUCCESS;
282 goto out;
283
284drop_frag:
285 kfree_skb(frag_skb);
286dropped:
287 kfree_skb(skb);
288out:
289 if (primary_if)
290 batadv_hardif_free_ref(primary_if);
291 return ret;
292}
293
294/**
295 * batadv_unicast_push_and_fill_skb - extends the buffer and initializes the
296 * common fields for unicast packets
297 * @skb: packet
298 * @hdr_size: amount of bytes to push at the beginning of the skb
299 * @orig_node: the destination node
300 *
301 * Returns false if the buffer extension was not possible or true otherwise
302 */
303static bool batadv_unicast_push_and_fill_skb(struct sk_buff *skb, int hdr_size,
304 struct batadv_orig_node *orig_node)
305{
306 struct batadv_unicast_packet *unicast_packet;
307 uint8_t ttvn = (uint8_t)atomic_read(&orig_node->last_ttvn);
308
309 if (batadv_skb_head_push(skb, hdr_size) < 0)
310 return false;
311
312 unicast_packet = (struct batadv_unicast_packet *)skb->data;
313 unicast_packet->header.version = BATADV_COMPAT_VERSION;
314 /* batman packet type: unicast */
315 unicast_packet->header.packet_type = BATADV_UNICAST;
316 /* set unicast ttl */
317 unicast_packet->header.ttl = BATADV_TTL;
318 /* copy the destination for faster routing */
319 memcpy(unicast_packet->dest, orig_node->orig, ETH_ALEN);
320 /* set the destination tt version number */
321 unicast_packet->ttvn = ttvn;
322
323 return true;
324}
325
326/**
327 * batadv_unicast_prepare_skb - encapsulate an skb with a unicast header
328 * @skb: the skb containing the payload to encapsulate
329 * @orig_node: the destination node
330 *
331 * Returns false if the payload could not be encapsulated or true otherwise.
332 *
333 * This call might reallocate skb data.
334 */
335static bool batadv_unicast_prepare_skb(struct sk_buff *skb,
336 struct batadv_orig_node *orig_node)
337{
338 size_t uni_size = sizeof(struct batadv_unicast_packet);
339 return batadv_unicast_push_and_fill_skb(skb, uni_size, orig_node);
340}
341
342/**
343 * batadv_unicast_4addr_prepare_skb - encapsulate an skb with a unicast4addr
344 * header
345 * @bat_priv: the bat priv with all the soft interface information
346 * @skb: the skb containing the payload to encapsulate
347 * @orig_node: the destination node
348 * @packet_subtype: the batman 4addr packet subtype to use
349 *
350 * Returns false if the payload could not be encapsulated or true otherwise.
351 *
352 * This call might reallocate skb data.
353 */
354bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
355 struct sk_buff *skb,
356 struct batadv_orig_node *orig,
357 int packet_subtype)
358{
359 struct batadv_hard_iface *primary_if;
360 struct batadv_unicast_4addr_packet *unicast_4addr_packet;
361 bool ret = false;
362
363 primary_if = batadv_primary_if_get_selected(bat_priv);
364 if (!primary_if)
365 goto out;
366
367 /* pull the header space and fill the unicast_packet substructure.
368 * We can do that because the first member of the unicast_4addr_packet
369 * is of type struct unicast_packet
370 */
371 if (!batadv_unicast_push_and_fill_skb(skb,
372 sizeof(*unicast_4addr_packet),
373 orig))
374 goto out;
375
376 unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data;
377 unicast_4addr_packet->u.header.packet_type = BATADV_UNICAST_4ADDR;
378 memcpy(unicast_4addr_packet->src, primary_if->net_dev->dev_addr,
379 ETH_ALEN);
380 unicast_4addr_packet->subtype = packet_subtype;
381 unicast_4addr_packet->reserved = 0;
382
383 ret = true;
384out:
385 if (primary_if)
386 batadv_hardif_free_ref(primary_if);
387 return ret;
388}
389
390/**
391 * batadv_unicast_generic_send_skb - send an skb as unicast
392 * @bat_priv: the bat priv with all the soft interface information
393 * @skb: payload to send
394 * @packet_type: the batman unicast packet type to use
395 * @packet_subtype: the batman packet subtype. It is ignored if packet_type is
396 * not BATADV_UNICAT_4ADDR
397 *
398 * Returns 1 in case of error or 0 otherwise
399 */
400int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
401 struct sk_buff *skb, int packet_type,
402 int packet_subtype)
403{
404 struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
405 struct batadv_unicast_packet *unicast_packet;
406 struct batadv_orig_node *orig_node;
407 struct batadv_neigh_node *neigh_node;
408 int data_len = skb->len;
409 int ret = NET_RX_DROP;
410 unsigned int dev_mtu, header_len;
411
412 /* get routing information */
413 if (is_multicast_ether_addr(ethhdr->h_dest)) {
414 orig_node = batadv_gw_get_selected_orig(bat_priv);
415 if (orig_node)
416 goto find_router;
417 }
418
419 /* check for tt host - increases orig_node refcount.
420 * returns NULL in case of AP isolation
421 */
422 orig_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
423 ethhdr->h_dest);
424
425find_router:
426 /* find_router():
427 * - if orig_node is NULL it returns NULL
428 * - increases neigh_nodes refcount if found.
429 */
430 neigh_node = batadv_find_router(bat_priv, orig_node, NULL);
431
432 if (!neigh_node)
433 goto out;
434
435 switch (packet_type) {
436 case BATADV_UNICAST:
437 if (!batadv_unicast_prepare_skb(skb, orig_node))
438 goto out;
439
440 header_len = sizeof(struct batadv_unicast_packet);
441 break;
442 case BATADV_UNICAST_4ADDR:
443 if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
444 packet_subtype))
445 goto out;
446
447 header_len = sizeof(struct batadv_unicast_4addr_packet);
448 break;
449 default:
450 /* this function supports UNICAST and UNICAST_4ADDR only. It
451 * should never be invoked with any other packet type
452 */
453 goto out;
454 }
455
456 ethhdr = (struct ethhdr *)(skb->data + header_len);
457 unicast_packet = (struct batadv_unicast_packet *)skb->data;
458
459 /* inform the destination node that we are still missing a correct route
460 * for this client. The destination will receive this packet and will
461 * try to reroute it because the ttvn contained in the header is less
462 * than the current one
463 */
464 if (batadv_tt_global_client_is_roaming(bat_priv, ethhdr->h_dest))
465 unicast_packet->ttvn = unicast_packet->ttvn - 1;
466
467 dev_mtu = neigh_node->if_incoming->net_dev->mtu;
468 /* fragmentation mechanism only works for UNICAST (now) */
469 if (packet_type == BATADV_UNICAST &&
470 atomic_read(&bat_priv->fragmentation) &&
471 data_len + sizeof(*unicast_packet) > dev_mtu) {
472 /* send frag skb decreases ttl */
473 unicast_packet->header.ttl++;
474 ret = batadv_frag_send_skb(skb, bat_priv,
475 neigh_node->if_incoming,
476 neigh_node->addr);
477 goto out;
478 }
479
480 if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP)
481 ret = 0;
482
483out:
484 if (neigh_node)
485 batadv_neigh_node_free_ref(neigh_node);
486 if (orig_node)
487 batadv_orig_node_free_ref(orig_node);
488 if (ret == NET_RX_DROP)
489 kfree_skb(skb);
490 return ret;
491}
diff --git a/net/batman-adv/unicast.h b/net/batman-adv/unicast.h
deleted file mode 100644
index 429cf8a4a31e..000000000000
--- a/net/batman-adv/unicast.h
+++ /dev/null
@@ -1,92 +0,0 @@
1/* Copyright (C) 2010-2013 B.A.T.M.A.N. contributors:
2 *
3 * Andreas Langer
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of version 2 of the GNU General Public
7 * License as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301, USA
18 */
19
20#ifndef _NET_BATMAN_ADV_UNICAST_H_
21#define _NET_BATMAN_ADV_UNICAST_H_
22
23#include "packet.h"
24
25#define BATADV_FRAG_TIMEOUT 10000 /* purge frag list entries after time in ms */
26#define BATADV_FRAG_BUFFER_SIZE 6 /* number of list elements in buffer */
27
28int batadv_frag_reassemble_skb(struct sk_buff *skb,
29 struct batadv_priv *bat_priv,
30 struct sk_buff **new_skb);
31void batadv_frag_list_free(struct list_head *head);
32int batadv_frag_send_skb(struct sk_buff *skb, struct batadv_priv *bat_priv,
33 struct batadv_hard_iface *hard_iface,
34 const uint8_t dstaddr[]);
35bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
36 struct sk_buff *skb,
37 struct batadv_orig_node *orig_node,
38 int packet_subtype);
39int batadv_unicast_generic_send_skb(struct batadv_priv *bat_priv,
40 struct sk_buff *skb, int packet_type,
41 int packet_subtype);
42
43
44/**
45 * batadv_unicast_send_skb - send the skb encapsulated in a unicast packet
46 * @bat_priv: the bat priv with all the soft interface information
47 * @skb: the payload to send
48 */
49static inline int batadv_unicast_send_skb(struct batadv_priv *bat_priv,
50 struct sk_buff *skb)
51{
52 return batadv_unicast_generic_send_skb(bat_priv, skb, BATADV_UNICAST,
53 0);
54}
55
56/**
57 * batadv_unicast_send_skb - send the skb encapsulated in a unicast4addr packet
58 * @bat_priv: the bat priv with all the soft interface information
59 * @skb: the payload to send
60 * @packet_subtype: the batman 4addr packet subtype to use
61 */
62static inline int batadv_unicast_4addr_send_skb(struct batadv_priv *bat_priv,
63 struct sk_buff *skb,
64 int packet_subtype)
65{
66 return batadv_unicast_generic_send_skb(bat_priv, skb,
67 BATADV_UNICAST_4ADDR,
68 packet_subtype);
69}
70
71static inline int batadv_frag_can_reassemble(const struct sk_buff *skb, int mtu)
72{
73 const struct batadv_unicast_frag_packet *unicast_packet;
74 int uneven_correction = 0;
75 unsigned int merged_size;
76
77 unicast_packet = (struct batadv_unicast_frag_packet *)skb->data;
78
79 if (unicast_packet->flags & BATADV_UNI_FRAG_LARGETAIL) {
80 if (unicast_packet->flags & BATADV_UNI_FRAG_HEAD)
81 uneven_correction = 1;
82 else
83 uneven_correction = -1;
84 }
85
86 merged_size = (skb->len - sizeof(*unicast_packet)) * 2;
87 merged_size += sizeof(struct batadv_unicast_packet) + uneven_correction;
88
89 return merged_size <= mtu;
90}
91
92#endif /* _NET_BATMAN_ADV_UNICAST_H_ */
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index f87736270eaa..878f008afefa 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -619,7 +619,7 @@ bad:
619 619
620/* Replicate the checks that IPv6 does on packet reception and pass the packet 620/* Replicate the checks that IPv6 does on packet reception and pass the packet
621 * to ip6tables, which doesn't support NAT, so things are fairly simple. */ 621 * to ip6tables, which doesn't support NAT, so things are fairly simple. */
622static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, 622static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
623 struct sk_buff *skb, 623 struct sk_buff *skb,
624 const struct net_device *in, 624 const struct net_device *in,
625 const struct net_device *out, 625 const struct net_device *out,
@@ -669,7 +669,8 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
669 * receiving device) to make netfilter happy, the REDIRECT 669 * receiving device) to make netfilter happy, the REDIRECT
670 * target in particular. Save the original destination IP 670 * target in particular. Save the original destination IP
671 * address to be able to detect DNAT afterwards. */ 671 * address to be able to detect DNAT afterwards. */
672static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, 672static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
673 struct sk_buff *skb,
673 const struct net_device *in, 674 const struct net_device *in,
674 const struct net_device *out, 675 const struct net_device *out,
675 int (*okfn)(struct sk_buff *)) 676 int (*okfn)(struct sk_buff *))
@@ -691,7 +692,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
691 return NF_ACCEPT; 692 return NF_ACCEPT;
692 693
693 nf_bridge_pull_encap_header_rcsum(skb); 694 nf_bridge_pull_encap_header_rcsum(skb);
694 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); 695 return br_nf_pre_routing_ipv6(ops, skb, in, out, okfn);
695 } 696 }
696 697
697 if (!brnf_call_iptables && !br->nf_call_iptables) 698 if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -727,7 +728,8 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
727 * took place when the packet entered the bridge), but we 728 * took place when the packet entered the bridge), but we
728 * register an IPv4 PRE_ROUTING 'sabotage' hook that will 729 * register an IPv4 PRE_ROUTING 'sabotage' hook that will
729 * prevent this from happening. */ 730 * prevent this from happening. */
730static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, 731static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
732 struct sk_buff *skb,
731 const struct net_device *in, 733 const struct net_device *in,
732 const struct net_device *out, 734 const struct net_device *out,
733 int (*okfn)(struct sk_buff *)) 735 int (*okfn)(struct sk_buff *))
@@ -765,7 +767,8 @@ static int br_nf_forward_finish(struct sk_buff *skb)
765 * but we are still able to filter on the 'real' indev/outdev 767 * but we are still able to filter on the 'real' indev/outdev
766 * because of the physdev module. For ARP, indev and outdev are the 768 * because of the physdev module. For ARP, indev and outdev are the
767 * bridge ports. */ 769 * bridge ports. */
768static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, 770static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
771 struct sk_buff *skb,
769 const struct net_device *in, 772 const struct net_device *in,
770 const struct net_device *out, 773 const struct net_device *out,
771 int (*okfn)(struct sk_buff *)) 774 int (*okfn)(struct sk_buff *))
@@ -818,7 +821,8 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
818 return NF_STOLEN; 821 return NF_STOLEN;
819} 822}
820 823
821static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, 824static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
825 struct sk_buff *skb,
822 const struct net_device *in, 826 const struct net_device *in,
823 const struct net_device *out, 827 const struct net_device *out,
824 int (*okfn)(struct sk_buff *)) 828 int (*okfn)(struct sk_buff *))
@@ -878,7 +882,8 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
878#endif 882#endif
879 883
880/* PF_BRIDGE/POST_ROUTING ********************************************/ 884/* PF_BRIDGE/POST_ROUTING ********************************************/
881static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, 885static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
886 struct sk_buff *skb,
882 const struct net_device *in, 887 const struct net_device *in,
883 const struct net_device *out, 888 const struct net_device *out,
884 int (*okfn)(struct sk_buff *)) 889 int (*okfn)(struct sk_buff *))
@@ -923,7 +928,8 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
923/* IP/SABOTAGE *****************************************************/ 928/* IP/SABOTAGE *****************************************************/
924/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 929/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
925 * for the second time. */ 930 * for the second time. */
926static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, 931static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
932 struct sk_buff *skb,
927 const struct net_device *in, 933 const struct net_device *in,
928 const struct net_device *out, 934 const struct net_device *out,
929 int (*okfn)(struct sk_buff *)) 935 int (*okfn)(struct sk_buff *))
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index a9aff9c7d027..68f8128147be 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -1,6 +1,9 @@
1# 1#
2# Bridge netfilter configuration 2# Bridge netfilter configuration
3# 3#
4#
5config NF_TABLES_BRIDGE
6 tristate "Ethernet Bridge nf_tables support"
4 7
5menuconfig BRIDGE_NF_EBTABLES 8menuconfig BRIDGE_NF_EBTABLES
6 tristate "Ethernet Bridge tables (ebtables) support" 9 tristate "Ethernet Bridge tables (ebtables) support"
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 0718699540b0..ea7629f58b3d 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -2,6 +2,8 @@
2# Makefile for the netfilter modules for Link Layer filtering on a bridge. 2# Makefile for the netfilter modules for Link Layer filtering on a bridge.
3# 3#
4 4
5obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
6
5obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o 7obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
6 8
7# tables 9# tables
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 94b2b700cff8..bb2da7b706e7 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -60,17 +60,21 @@ static const struct ebt_table frame_filter =
60}; 60};
61 61
62static unsigned int 62static unsigned int
63ebt_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, 63ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
64 const struct net_device *out, int (*okfn)(struct sk_buff *)) 64 const struct net_device *in, const struct net_device *out,
65 int (*okfn)(struct sk_buff *))
65{ 66{
66 return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_filter); 67 return ebt_do_table(ops->hooknum, skb, in, out,
68 dev_net(in)->xt.frame_filter);
67} 69}
68 70
69static unsigned int 71static unsigned int
70ebt_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, 72ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
71 const struct net_device *out, int (*okfn)(struct sk_buff *)) 73 const struct net_device *in, const struct net_device *out,
74 int (*okfn)(struct sk_buff *))
72{ 75{
73 return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_filter); 76 return ebt_do_table(ops->hooknum, skb, in, out,
77 dev_net(out)->xt.frame_filter);
74} 78}
75 79
76static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { 80static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 322555acdd40..bd238f1f105b 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -60,17 +60,21 @@ static struct ebt_table frame_nat =
60}; 60};
61 61
62static unsigned int 62static unsigned int
63ebt_nat_in(unsigned int hook, struct sk_buff *skb, const struct net_device *in 63ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
64 , const struct net_device *out, int (*okfn)(struct sk_buff *)) 64 const struct net_device *in, const struct net_device *out,
65 int (*okfn)(struct sk_buff *))
65{ 66{
66 return ebt_do_table(hook, skb, in, out, dev_net(in)->xt.frame_nat); 67 return ebt_do_table(ops->hooknum, skb, in, out,
68 dev_net(in)->xt.frame_nat);
67} 69}
68 70
69static unsigned int 71static unsigned int
70ebt_nat_out(unsigned int hook, struct sk_buff *skb, const struct net_device *in 72ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
71 , const struct net_device *out, int (*okfn)(struct sk_buff *)) 73 const struct net_device *in, const struct net_device *out,
74 int (*okfn)(struct sk_buff *))
72{ 75{
73 return ebt_do_table(hook, skb, in, out, dev_net(out)->xt.frame_nat); 76 return ebt_do_table(ops->hooknum, skb, in, out,
77 dev_net(out)->xt.frame_nat);
74} 78}
75 79
76static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { 80static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
new file mode 100644
index 000000000000..e8cb016fa34d
--- /dev/null
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -0,0 +1,65 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/netfilter_bridge.h>
14#include <net/netfilter/nf_tables.h>
15
16static struct nft_af_info nft_af_bridge __read_mostly = {
17 .family = NFPROTO_BRIDGE,
18 .nhooks = NF_BR_NUMHOOKS,
19 .owner = THIS_MODULE,
20};
21
22static int nf_tables_bridge_init_net(struct net *net)
23{
24 net->nft.bridge = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
25 if (net->nft.bridge == NULL)
26 return -ENOMEM;
27
28 memcpy(net->nft.bridge, &nft_af_bridge, sizeof(nft_af_bridge));
29
30 if (nft_register_afinfo(net, net->nft.bridge) < 0)
31 goto err;
32
33 return 0;
34err:
35 kfree(net->nft.bridge);
36 return -ENOMEM;
37}
38
39static void nf_tables_bridge_exit_net(struct net *net)
40{
41 nft_unregister_afinfo(net->nft.bridge);
42 kfree(net->nft.bridge);
43}
44
45static struct pernet_operations nf_tables_bridge_net_ops = {
46 .init = nf_tables_bridge_init_net,
47 .exit = nf_tables_bridge_exit_net,
48};
49
50static int __init nf_tables_bridge_init(void)
51{
52 return register_pernet_subsys(&nf_tables_bridge_net_ops);
53}
54
55static void __exit nf_tables_bridge_exit(void)
56{
57 return unregister_pernet_subsys(&nf_tables_bridge_net_ops);
58}
59
60module_init(nf_tables_bridge_init);
61module_exit(nf_tables_bridge_exit);
62
63MODULE_LICENSE("GPL");
64MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
65MODULE_ALIAS_NFT_FAMILY(AF_BRIDGE);
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index 2a7efe388344..e83015cecfa7 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
87} 87}
88 88
89 89
90static unsigned int dnrmg_hook(unsigned int hook, 90static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
91 struct sk_buff *skb, 91 struct sk_buff *skb,
92 const struct net_device *in, 92 const struct net_device *in,
93 const struct net_device *out, 93 const struct net_device *out,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 41e1c3ea8b51..56a964a553d2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -336,12 +336,9 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
336 err = 0; 336 err = 0;
337 337
338out: 338out:
339 if (sk) { 339 if (sk)
340 if (sk->sk_state == TCP_TIME_WAIT) 340 sock_gen_put(sk);
341 inet_twsk_put((struct inet_timewait_sock *)sk); 341
342 else
343 sock_put(sk);
344 }
345out_nosk: 342out_nosk:
346 return err; 343 return err;
347} 344}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1657e39b291f..40d56073cd19 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,6 +36,27 @@ config NF_CONNTRACK_PROC_COMPAT
36 36
37 If unsure, say Y. 37 If unsure, say Y.
38 38
39config NF_TABLES_IPV4
40 depends on NF_TABLES
41 tristate "IPv4 nf_tables support"
42
43config NFT_REJECT_IPV4
44 depends on NF_TABLES_IPV4
45 tristate "nf_tables IPv4 reject support"
46
47config NFT_CHAIN_ROUTE_IPV4
48 depends on NF_TABLES_IPV4
49 tristate "IPv4 nf_tables route chain support"
50
51config NFT_CHAIN_NAT_IPV4
52 depends on NF_TABLES_IPV4
53 depends on NF_NAT_IPV4 && NFT_NAT
54 tristate "IPv4 nf_tables nat chain support"
55
56config NF_TABLES_ARP
57 depends on NF_TABLES
58 tristate "ARP nf_tables support"
59
39config IP_NF_IPTABLES 60config IP_NF_IPTABLES
40 tristate "IP tables support (required for filtering/masq/NAT)" 61 tristate "IP tables support (required for filtering/masq/NAT)"
41 default m if NETFILTER_ADVANCED=n 62 default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 3622b248b6dd..19df72b7ba88 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,6 +27,12 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
27# NAT protocols (nf_nat) 27# NAT protocols (nf_nat)
28obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o 28obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
29 29
30obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
31obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
32obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
33obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
34obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
35
30# generic IP tables 36# generic IP tables
31obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o 37obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
32 38
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index a865f6f94013..802ddecb30b8 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,14 @@ static const struct xt_table packet_filter = {
27 27
28/* The work comes in here from netfilter.c */ 28/* The work comes in here from netfilter.c */
29static unsigned int 29static unsigned int
30arptable_filter_hook(unsigned int hook, struct sk_buff *skb, 30arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
31 const struct net_device *in, const struct net_device *out, 31 const struct net_device *in, const struct net_device *out,
32 int (*okfn)(struct sk_buff *)) 32 int (*okfn)(struct sk_buff *))
33{ 33{
34 const struct net *net = dev_net((in != NULL) ? in : out); 34 const struct net *net = dev_net((in != NULL) ? in : out);
35 35
36 return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter); 36 return arpt_do_table(skb, ops->hooknum, in, out,
37 net->ipv4.arptable_filter);
37} 38}
38 39
39static struct nf_hook_ops *arpfilter_ops __read_mostly; 40static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0b732efd32e2..a2e2b61cd7da 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -483,7 +483,7 @@ static void arp_print(struct arp_payload *payload)
483#endif 483#endif
484 484
485static unsigned int 485static unsigned int
486arp_mangle(unsigned int hook, 486arp_mangle(const struct nf_hook_ops *ops,
487 struct sk_buff *skb, 487 struct sk_buff *skb,
488 const struct net_device *in, 488 const struct net_device *in,
489 const struct net_device *out, 489 const struct net_device *out,
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index b6346bf2fde3..01cffeaa0085 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -297,7 +297,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
297 return XT_CONTINUE; 297 return XT_CONTINUE;
298} 298}
299 299
300static unsigned int ipv4_synproxy_hook(unsigned int hooknum, 300static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
301 struct sk_buff *skb, 301 struct sk_buff *skb,
302 const struct net_device *in, 302 const struct net_device *in,
303 const struct net_device *out, 303 const struct net_device *out,
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 50af5b45c050..e08a74a243a8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,20 +33,21 @@ static const struct xt_table packet_filter = {
33}; 33};
34 34
35static unsigned int 35static unsigned int
36iptable_filter_hook(unsigned int hook, struct sk_buff *skb, 36iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
37 const struct net_device *in, const struct net_device *out, 37 const struct net_device *in, const struct net_device *out,
38 int (*okfn)(struct sk_buff *)) 38 int (*okfn)(struct sk_buff *))
39{ 39{
40 const struct net *net; 40 const struct net *net;
41 41
42 if (hook == NF_INET_LOCAL_OUT && 42 if (ops->hooknum == NF_INET_LOCAL_OUT &&
43 (skb->len < sizeof(struct iphdr) || 43 (skb->len < sizeof(struct iphdr) ||
44 ip_hdrlen(skb) < sizeof(struct iphdr))) 44 ip_hdrlen(skb) < sizeof(struct iphdr)))
45 /* root is playing with raw sockets. */ 45 /* root is playing with raw sockets. */
46 return NF_ACCEPT; 46 return NF_ACCEPT;
47 47
48 net = dev_net((in != NULL) ? in : out); 48 net = dev_net((in != NULL) ? in : out);
49 return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter); 49 return ipt_do_table(skb, ops->hooknum, in, out,
50 net->ipv4.iptable_filter);
50} 51}
51 52
52static struct nf_hook_ops *filter_ops __read_mostly; 53static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 0d8cd82e0fad..6a5079c34bb3 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -79,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
79 79
80/* The work comes in here from netfilter.c. */ 80/* The work comes in here from netfilter.c. */
81static unsigned int 81static unsigned int
82iptable_mangle_hook(unsigned int hook, 82iptable_mangle_hook(const struct nf_hook_ops *ops,
83 struct sk_buff *skb, 83 struct sk_buff *skb,
84 const struct net_device *in, 84 const struct net_device *in,
85 const struct net_device *out, 85 const struct net_device *out,
86 int (*okfn)(struct sk_buff *)) 86 int (*okfn)(struct sk_buff *))
87{ 87{
88 if (hook == NF_INET_LOCAL_OUT) 88 if (ops->hooknum == NF_INET_LOCAL_OUT)
89 return ipt_mangle_out(skb, out); 89 return ipt_mangle_out(skb, out);
90 if (hook == NF_INET_POST_ROUTING) 90 if (ops->hooknum == NF_INET_POST_ROUTING)
91 return ipt_do_table(skb, hook, in, out, 91 return ipt_do_table(skb, ops->hooknum, in, out,
92 dev_net(out)->ipv4.iptable_mangle); 92 dev_net(out)->ipv4.iptable_mangle);
93 /* PREROUTING/INPUT/FORWARD: */ 93 /* PREROUTING/INPUT/FORWARD: */
94 return ipt_do_table(skb, hook, in, out, 94 return ipt_do_table(skb, ops->hooknum, in, out,
95 dev_net(in)->ipv4.iptable_mangle); 95 dev_net(in)->ipv4.iptable_mangle);
96} 96}
97 97
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 683bfaffed65..ee2886126e3d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
61} 61}
62 62
63static unsigned int 63static unsigned int
64nf_nat_ipv4_fn(unsigned int hooknum, 64nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
65 struct sk_buff *skb, 65 struct sk_buff *skb,
66 const struct net_device *in, 66 const struct net_device *in,
67 const struct net_device *out, 67 const struct net_device *out,
@@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
71 enum ip_conntrack_info ctinfo; 71 enum ip_conntrack_info ctinfo;
72 struct nf_conn_nat *nat; 72 struct nf_conn_nat *nat;
73 /* maniptype == SRC for postrouting. */ 73 /* maniptype == SRC for postrouting. */
74 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); 74 enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
75 75
76 /* We never see fragments: conntrack defrags on pre-routing 76 /* We never see fragments: conntrack defrags on pre-routing
77 * and local-out, and nf_nat_out protects post-routing. 77 * and local-out, and nf_nat_out protects post-routing.
@@ -108,7 +108,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
108 case IP_CT_RELATED_REPLY: 108 case IP_CT_RELATED_REPLY:
109 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 109 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
110 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 110 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
111 hooknum)) 111 ops->hooknum))
112 return NF_DROP; 112 return NF_DROP;
113 else 113 else
114 return NF_ACCEPT; 114 return NF_ACCEPT;
@@ -121,14 +121,14 @@ nf_nat_ipv4_fn(unsigned int hooknum,
121 if (!nf_nat_initialized(ct, maniptype)) { 121 if (!nf_nat_initialized(ct, maniptype)) {
122 unsigned int ret; 122 unsigned int ret;
123 123
124 ret = nf_nat_rule_find(skb, hooknum, in, out, ct); 124 ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
125 if (ret != NF_ACCEPT) 125 if (ret != NF_ACCEPT)
126 return ret; 126 return ret;
127 } else { 127 } else {
128 pr_debug("Already setup manip %s for ct %p\n", 128 pr_debug("Already setup manip %s for ct %p\n",
129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 129 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
130 ct); 130 ct);
131 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) 131 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
132 goto oif_changed; 132 goto oif_changed;
133 } 133 }
134 break; 134 break;
@@ -137,11 +137,11 @@ nf_nat_ipv4_fn(unsigned int hooknum,
137 /* ESTABLISHED */ 137 /* ESTABLISHED */
138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 138 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
139 ctinfo == IP_CT_ESTABLISHED_REPLY); 139 ctinfo == IP_CT_ESTABLISHED_REPLY);
140 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) 140 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
141 goto oif_changed; 141 goto oif_changed;
142 } 142 }
143 143
144 return nf_nat_packet(ct, ctinfo, hooknum, skb); 144 return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
145 145
146oif_changed: 146oif_changed:
147 nf_ct_kill_acct(ct, ctinfo, skb); 147 nf_ct_kill_acct(ct, ctinfo, skb);
@@ -149,7 +149,7 @@ oif_changed:
149} 149}
150 150
151static unsigned int 151static unsigned int
152nf_nat_ipv4_in(unsigned int hooknum, 152nf_nat_ipv4_in(const struct nf_hook_ops *ops,
153 struct sk_buff *skb, 153 struct sk_buff *skb,
154 const struct net_device *in, 154 const struct net_device *in,
155 const struct net_device *out, 155 const struct net_device *out,
@@ -158,7 +158,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
158 unsigned int ret; 158 unsigned int ret;
159 __be32 daddr = ip_hdr(skb)->daddr; 159 __be32 daddr = ip_hdr(skb)->daddr;
160 160
161 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); 161 ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
162 if (ret != NF_DROP && ret != NF_STOLEN && 162 if (ret != NF_DROP && ret != NF_STOLEN &&
163 daddr != ip_hdr(skb)->daddr) 163 daddr != ip_hdr(skb)->daddr)
164 skb_dst_drop(skb); 164 skb_dst_drop(skb);
@@ -167,7 +167,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
167} 167}
168 168
169static unsigned int 169static unsigned int
170nf_nat_ipv4_out(unsigned int hooknum, 170nf_nat_ipv4_out(const struct nf_hook_ops *ops,
171 struct sk_buff *skb, 171 struct sk_buff *skb,
172 const struct net_device *in, 172 const struct net_device *in,
173 const struct net_device *out, 173 const struct net_device *out,
@@ -185,7 +185,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
185 ip_hdrlen(skb) < sizeof(struct iphdr)) 185 ip_hdrlen(skb) < sizeof(struct iphdr))
186 return NF_ACCEPT; 186 return NF_ACCEPT;
187 187
188 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); 188 ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
189#ifdef CONFIG_XFRM 189#ifdef CONFIG_XFRM
190 if (ret != NF_DROP && ret != NF_STOLEN && 190 if (ret != NF_DROP && ret != NF_STOLEN &&
191 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 191 !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -207,7 +207,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
207} 207}
208 208
209static unsigned int 209static unsigned int
210nf_nat_ipv4_local_fn(unsigned int hooknum, 210nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
211 struct sk_buff *skb, 211 struct sk_buff *skb,
212 const struct net_device *in, 212 const struct net_device *in,
213 const struct net_device *out, 213 const struct net_device *out,
@@ -223,7 +223,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
223 ip_hdrlen(skb) < sizeof(struct iphdr)) 223 ip_hdrlen(skb) < sizeof(struct iphdr))
224 return NF_ACCEPT; 224 return NF_ACCEPT;
225 225
226 ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn); 226 ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
227 if (ret != NF_DROP && ret != NF_STOLEN && 227 if (ret != NF_DROP && ret != NF_STOLEN &&
228 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 228 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
229 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 229 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1f82aea11df6..b2f7e8f98316 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,20 +20,20 @@ static const struct xt_table packet_raw = {
20 20
21/* The work comes in here from netfilter.c. */ 21/* The work comes in here from netfilter.c. */
22static unsigned int 22static unsigned int
23iptable_raw_hook(unsigned int hook, struct sk_buff *skb, 23iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
24 const struct net_device *in, const struct net_device *out, 24 const struct net_device *in, const struct net_device *out,
25 int (*okfn)(struct sk_buff *)) 25 int (*okfn)(struct sk_buff *))
26{ 26{
27 const struct net *net; 27 const struct net *net;
28 28
29 if (hook == NF_INET_LOCAL_OUT && 29 if (ops->hooknum == NF_INET_LOCAL_OUT &&
30 (skb->len < sizeof(struct iphdr) || 30 (skb->len < sizeof(struct iphdr) ||
31 ip_hdrlen(skb) < sizeof(struct iphdr))) 31 ip_hdrlen(skb) < sizeof(struct iphdr)))
32 /* root is playing with raw sockets. */ 32 /* root is playing with raw sockets. */
33 return NF_ACCEPT; 33 return NF_ACCEPT;
34 34
35 net = dev_net((in != NULL) ? in : out); 35 net = dev_net((in != NULL) ? in : out);
36 return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw); 36 return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
37} 37}
38 38
39static struct nf_hook_ops *rawtable_ops __read_mostly; 39static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index f867a8d38bf7..c86647ed2078 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,21 +37,22 @@ static const struct xt_table security_table = {
37}; 37};
38 38
39static unsigned int 39static unsigned int
40iptable_security_hook(unsigned int hook, struct sk_buff *skb, 40iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
41 const struct net_device *in, 41 const struct net_device *in,
42 const struct net_device *out, 42 const struct net_device *out,
43 int (*okfn)(struct sk_buff *)) 43 int (*okfn)(struct sk_buff *))
44{ 44{
45 const struct net *net; 45 const struct net *net;
46 46
47 if (hook == NF_INET_LOCAL_OUT && 47 if (ops->hooknum == NF_INET_LOCAL_OUT &&
48 (skb->len < sizeof(struct iphdr) || 48 (skb->len < sizeof(struct iphdr) ||
49 ip_hdrlen(skb) < sizeof(struct iphdr))) 49 ip_hdrlen(skb) < sizeof(struct iphdr)))
50 /* Somebody is playing with raw sockets. */ 50 /* Somebody is playing with raw sockets. */
51 return NF_ACCEPT; 51 return NF_ACCEPT;
52 52
53 net = dev_net((in != NULL) ? in : out); 53 net = dev_net((in != NULL) ? in : out);
54 return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security); 54 return ipt_do_table(skb, ops->hooknum, in, out,
55 net->ipv4.iptable_security);
55} 56}
56 57
57static struct nf_hook_ops *sectbl_ops __read_mostly; 58static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 86f5b34a4ed1..ecd8bec411c9 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
92 return NF_ACCEPT; 92 return NF_ACCEPT;
93} 93}
94 94
95static unsigned int ipv4_helper(unsigned int hooknum, 95static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
96 struct sk_buff *skb, 96 struct sk_buff *skb,
97 const struct net_device *in, 97 const struct net_device *in,
98 const struct net_device *out, 98 const struct net_device *out,
@@ -121,7 +121,7 @@ static unsigned int ipv4_helper(unsigned int hooknum,
121 ct, ctinfo); 121 ct, ctinfo);
122} 122}
123 123
124static unsigned int ipv4_confirm(unsigned int hooknum, 124static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
125 struct sk_buff *skb, 125 struct sk_buff *skb,
126 const struct net_device *in, 126 const struct net_device *in,
127 const struct net_device *out, 127 const struct net_device *out,
@@ -147,16 +147,16 @@ out:
147 return nf_conntrack_confirm(skb); 147 return nf_conntrack_confirm(skb);
148} 148}
149 149
150static unsigned int ipv4_conntrack_in(unsigned int hooknum, 150static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
151 struct sk_buff *skb, 151 struct sk_buff *skb,
152 const struct net_device *in, 152 const struct net_device *in,
153 const struct net_device *out, 153 const struct net_device *out,
154 int (*okfn)(struct sk_buff *)) 154 int (*okfn)(struct sk_buff *))
155{ 155{
156 return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); 156 return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
157} 157}
158 158
159static unsigned int ipv4_conntrack_local(unsigned int hooknum, 159static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
160 struct sk_buff *skb, 160 struct sk_buff *skb,
161 const struct net_device *in, 161 const struct net_device *in,
162 const struct net_device *out, 162 const struct net_device *out,
@@ -166,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
166 if (skb->len < sizeof(struct iphdr) || 166 if (skb->len < sizeof(struct iphdr) ||
167 ip_hdrlen(skb) < sizeof(struct iphdr)) 167 ip_hdrlen(skb) < sizeof(struct iphdr))
168 return NF_ACCEPT; 168 return NF_ACCEPT;
169 return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); 169 return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
170} 170}
171 171
172/* Connection tracking may drop packets, but never alters them, so 172/* Connection tracking may drop packets, but never alters them, so
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 742815518b0f..12e13bd82b5b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -60,7 +60,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
60 return IP_DEFRAG_CONNTRACK_OUT + zone; 60 return IP_DEFRAG_CONNTRACK_OUT + zone;
61} 61}
62 62
63static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, 63static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
64 struct sk_buff *skb, 64 struct sk_buff *skb,
65 const struct net_device *in, 65 const struct net_device *in,
66 const struct net_device *out, 66 const struct net_device *out,
@@ -83,7 +83,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
83#endif 83#endif
84 /* Gather fragments. */ 84 /* Gather fragments. */
85 if (ip_is_fragment(ip_hdr(skb))) { 85 if (ip_is_fragment(ip_hdr(skb))) {
86 enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb); 86 enum ip_defrag_users user =
87 nf_ct_defrag_user(ops->hooknum, skb);
88
87 if (nf_ct_ipv4_gather_frags(skb, user)) 89 if (nf_ct_ipv4_gather_frags(skb, user))
88 return NF_STOLEN; 90 return NF_STOLEN;
89 } 91 }
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
new file mode 100644
index 000000000000..3e67ef1c676f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -0,0 +1,102 @@
1/*
2 * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/netfilter_arp.h>
15#include <net/netfilter/nf_tables.h>
16
17static struct nft_af_info nft_af_arp __read_mostly = {
18 .family = NFPROTO_ARP,
19 .nhooks = NF_ARP_NUMHOOKS,
20 .owner = THIS_MODULE,
21};
22
23static int nf_tables_arp_init_net(struct net *net)
24{
25 net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
26 if (net->nft.arp== NULL)
27 return -ENOMEM;
28
29 memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
30
31 if (nft_register_afinfo(net, net->nft.arp) < 0)
32 goto err;
33
34 return 0;
35err:
36 kfree(net->nft.arp);
37 return -ENOMEM;
38}
39
40static void nf_tables_arp_exit_net(struct net *net)
41{
42 nft_unregister_afinfo(net->nft.arp);
43 kfree(net->nft.arp);
44}
45
46static struct pernet_operations nf_tables_arp_net_ops = {
47 .init = nf_tables_arp_init_net,
48 .exit = nf_tables_arp_exit_net,
49};
50
51static unsigned int
52nft_do_chain_arp(const struct nf_hook_ops *ops,
53 struct sk_buff *skb,
54 const struct net_device *in,
55 const struct net_device *out,
56 int (*okfn)(struct sk_buff *))
57{
58 struct nft_pktinfo pkt;
59
60 nft_set_pktinfo(&pkt, ops, skb, in, out);
61
62 return nft_do_chain_pktinfo(&pkt, ops);
63}
64
65static struct nf_chain_type filter_arp = {
66 .family = NFPROTO_ARP,
67 .name = "filter",
68 .type = NFT_CHAIN_T_DEFAULT,
69 .hook_mask = (1 << NF_ARP_IN) |
70 (1 << NF_ARP_OUT) |
71 (1 << NF_ARP_FORWARD),
72 .fn = {
73 [NF_ARP_IN] = nft_do_chain_arp,
74 [NF_ARP_OUT] = nft_do_chain_arp,
75 [NF_ARP_FORWARD] = nft_do_chain_arp,
76 },
77};
78
79static int __init nf_tables_arp_init(void)
80{
81 int ret;
82
83 nft_register_chain_type(&filter_arp);
84 ret = register_pernet_subsys(&nf_tables_arp_net_ops);
85 if (ret < 0)
86 nft_unregister_chain_type(&filter_arp);
87
88 return ret;
89}
90
91static void __exit nf_tables_arp_exit(void)
92{
93 unregister_pernet_subsys(&nf_tables_arp_net_ops);
94 nft_unregister_chain_type(&filter_arp);
95}
96
97module_init(nf_tables_arp_init);
98module_exit(nf_tables_arp_exit);
99
100MODULE_LICENSE("GPL");
101MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
102MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 000000000000..8f7536be1322
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,128 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
10 */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/ip.h>
15#include <linux/netfilter_ipv4.h>
16#include <net/netfilter/nf_tables.h>
17#include <net/net_namespace.h>
18#include <net/ip.h>
19#include <net/net_namespace.h>
20#include <net/netfilter/nf_tables_ipv4.h>
21
22static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
23 struct sk_buff *skb,
24 const struct net_device *in,
25 const struct net_device *out,
26 int (*okfn)(struct sk_buff *))
27{
28 struct nft_pktinfo pkt;
29
30 if (unlikely(skb->len < sizeof(struct iphdr) ||
31 ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
32 if (net_ratelimit())
33 pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
34 "packet\n");
35 return NF_ACCEPT;
36 }
37 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
38
39 return nft_do_chain_pktinfo(&pkt, ops);
40}
41
42static struct nft_af_info nft_af_ipv4 __read_mostly = {
43 .family = NFPROTO_IPV4,
44 .nhooks = NF_INET_NUMHOOKS,
45 .owner = THIS_MODULE,
46 .hooks = {
47 [NF_INET_LOCAL_OUT] = nft_ipv4_output,
48 },
49};
50
51static int nf_tables_ipv4_init_net(struct net *net)
52{
53 net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
54 if (net->nft.ipv4 == NULL)
55 return -ENOMEM;
56
57 memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
58
59 if (nft_register_afinfo(net, net->nft.ipv4) < 0)
60 goto err;
61
62 return 0;
63err:
64 kfree(net->nft.ipv4);
65 return -ENOMEM;
66}
67
68static void nf_tables_ipv4_exit_net(struct net *net)
69{
70 nft_unregister_afinfo(net->nft.ipv4);
71 kfree(net->nft.ipv4);
72}
73
74static struct pernet_operations nf_tables_ipv4_net_ops = {
75 .init = nf_tables_ipv4_init_net,
76 .exit = nf_tables_ipv4_exit_net,
77};
78
79static unsigned int
80nft_do_chain_ipv4(const struct nf_hook_ops *ops,
81 struct sk_buff *skb,
82 const struct net_device *in,
83 const struct net_device *out,
84 int (*okfn)(struct sk_buff *))
85{
86 struct nft_pktinfo pkt;
87
88 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
89
90 return nft_do_chain_pktinfo(&pkt, ops);
91}
92
93static struct nf_chain_type filter_ipv4 = {
94 .family = NFPROTO_IPV4,
95 .name = "filter",
96 .type = NFT_CHAIN_T_DEFAULT,
97 .hook_mask = (1 << NF_INET_LOCAL_IN) |
98 (1 << NF_INET_LOCAL_OUT) |
99 (1 << NF_INET_FORWARD) |
100 (1 << NF_INET_PRE_ROUTING) |
101 (1 << NF_INET_POST_ROUTING),
102 .fn = {
103 [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
104 [NF_INET_LOCAL_OUT] = nft_ipv4_output,
105 [NF_INET_FORWARD] = nft_do_chain_ipv4,
106 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
107 [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
108 },
109};
110
111static int __init nf_tables_ipv4_init(void)
112{
113 nft_register_chain_type(&filter_ipv4);
114 return register_pernet_subsys(&nf_tables_ipv4_net_ops);
115}
116
117static void __exit nf_tables_ipv4_exit(void)
118{
119 unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
120 nft_unregister_chain_type(&filter_ipv4);
121}
122
123module_init(nf_tables_ipv4_init);
124module_exit(nf_tables_ipv4_exit);
125
126MODULE_LICENSE("GPL");
127MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
128MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
new file mode 100644
index 000000000000..cf2c792cd971
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -0,0 +1,205 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
4 * Copyright (c) 2012 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * Development of this code funded by Astaro AG (http://www.astaro.com/)
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/list.h>
16#include <linux/skbuff.h>
17#include <linux/ip.h>
18#include <linux/netfilter.h>
19#include <linux/netfilter_ipv4.h>
20#include <linux/netfilter/nf_tables.h>
21#include <net/netfilter/nf_conntrack.h>
22#include <net/netfilter/nf_nat.h>
23#include <net/netfilter/nf_nat_core.h>
24#include <net/netfilter/nf_tables.h>
25#include <net/netfilter/nf_tables_ipv4.h>
26#include <net/netfilter/nf_nat_l3proto.h>
27#include <net/ip.h>
28
29/*
30 * NAT chains
31 */
32
33static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
34 struct sk_buff *skb,
35 const struct net_device *in,
36 const struct net_device *out,
37 int (*okfn)(struct sk_buff *))
38{
39 enum ip_conntrack_info ctinfo;
40 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
41 struct nf_conn_nat *nat;
42 enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
43 struct nft_pktinfo pkt;
44 unsigned int ret;
45
46 if (ct == NULL || nf_ct_is_untracked(ct))
47 return NF_ACCEPT;
48
49 NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
50
51 nat = nfct_nat(ct);
52 if (nat == NULL) {
53 /* Conntrack module was loaded late, can't add extension. */
54 if (nf_ct_is_confirmed(ct))
55 return NF_ACCEPT;
56 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
57 if (nat == NULL)
58 return NF_ACCEPT;
59 }
60
61 switch (ctinfo) {
62 case IP_CT_RELATED:
63 case IP_CT_RELATED + IP_CT_IS_REPLY:
64 if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
65 if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
66 ops->hooknum))
67 return NF_DROP;
68 else
69 return NF_ACCEPT;
70 }
71 /* Fall through */
72 case IP_CT_NEW:
73 if (nf_nat_initialized(ct, maniptype))
74 break;
75
76 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
77
78 ret = nft_do_chain_pktinfo(&pkt, ops);
79 if (ret != NF_ACCEPT)
80 return ret;
81 if (!nf_nat_initialized(ct, maniptype)) {
82 ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
83 if (ret != NF_ACCEPT)
84 return ret;
85 }
86 default:
87 break;
88 }
89
90 return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
91}
92
93static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
94 struct sk_buff *skb,
95 const struct net_device *in,
96 const struct net_device *out,
97 int (*okfn)(struct sk_buff *))
98{
99 __be32 daddr = ip_hdr(skb)->daddr;
100 unsigned int ret;
101
102 ret = nf_nat_fn(ops, skb, in, out, okfn);
103 if (ret != NF_DROP && ret != NF_STOLEN &&
104 ip_hdr(skb)->daddr != daddr) {
105 skb_dst_drop(skb);
106 }
107 return ret;
108}
109
110static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
111 struct sk_buff *skb,
112 const struct net_device *in,
113 const struct net_device *out,
114 int (*okfn)(struct sk_buff *))
115{
116 enum ip_conntrack_info ctinfo __maybe_unused;
117 const struct nf_conn *ct __maybe_unused;
118 unsigned int ret;
119
120 ret = nf_nat_fn(ops, skb, in, out, okfn);
121#ifdef CONFIG_XFRM
122 if (ret != NF_DROP && ret != NF_STOLEN &&
123 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
124 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
125
126 if (ct->tuplehash[dir].tuple.src.u3.ip !=
127 ct->tuplehash[!dir].tuple.dst.u3.ip ||
128 ct->tuplehash[dir].tuple.src.u.all !=
129 ct->tuplehash[!dir].tuple.dst.u.all)
130 return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
131 ret : NF_DROP;
132 }
133#endif
134 return ret;
135}
136
137static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
138 struct sk_buff *skb,
139 const struct net_device *in,
140 const struct net_device *out,
141 int (*okfn)(struct sk_buff *))
142{
143 enum ip_conntrack_info ctinfo;
144 const struct nf_conn *ct;
145 unsigned int ret;
146
147 ret = nf_nat_fn(ops, skb, in, out, okfn);
148 if (ret != NF_DROP && ret != NF_STOLEN &&
149 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
150 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
151
152 if (ct->tuplehash[dir].tuple.dst.u3.ip !=
153 ct->tuplehash[!dir].tuple.src.u3.ip) {
154 if (ip_route_me_harder(skb, RTN_UNSPEC))
155 ret = NF_DROP;
156 }
157#ifdef CONFIG_XFRM
158 else if (ct->tuplehash[dir].tuple.dst.u.all !=
159 ct->tuplehash[!dir].tuple.src.u.all)
160 if (nf_xfrm_me_harder(skb, AF_INET))
161 ret = NF_DROP;
162#endif
163 }
164 return ret;
165}
166
167static struct nf_chain_type nft_chain_nat_ipv4 = {
168 .family = NFPROTO_IPV4,
169 .name = "nat",
170 .type = NFT_CHAIN_T_NAT,
171 .hook_mask = (1 << NF_INET_PRE_ROUTING) |
172 (1 << NF_INET_POST_ROUTING) |
173 (1 << NF_INET_LOCAL_OUT) |
174 (1 << NF_INET_LOCAL_IN),
175 .fn = {
176 [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
177 [NF_INET_POST_ROUTING] = nf_nat_postrouting,
178 [NF_INET_LOCAL_OUT] = nf_nat_output,
179 [NF_INET_LOCAL_IN] = nf_nat_fn,
180 },
181 .me = THIS_MODULE,
182};
183
184static int __init nft_chain_nat_init(void)
185{
186 int err;
187
188 err = nft_register_chain_type(&nft_chain_nat_ipv4);
189 if (err < 0)
190 return err;
191
192 return 0;
193}
194
195static void __exit nft_chain_nat_exit(void)
196{
197 nft_unregister_chain_type(&nft_chain_nat_ipv4);
198}
199
200module_init(nft_chain_nat_init);
201module_exit(nft_chain_nat_exit);
202
203MODULE_LICENSE("GPL");
204MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
205MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
new file mode 100644
index 000000000000..4e6bf9a3d7aa
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -0,0 +1,90 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/list.h>
13#include <linux/skbuff.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter_ipv4.h>
17#include <linux/netfilter/nfnetlink.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables.h>
20#include <net/netfilter/nf_tables_ipv4.h>
21#include <net/route.h>
22#include <net/ip.h>
23
24static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
25 struct sk_buff *skb,
26 const struct net_device *in,
27 const struct net_device *out,
28 int (*okfn)(struct sk_buff *))
29{
30 unsigned int ret;
31 struct nft_pktinfo pkt;
32 u32 mark;
33 __be32 saddr, daddr;
34 u_int8_t tos;
35 const struct iphdr *iph;
36
37 /* root is playing with raw sockets. */
38 if (skb->len < sizeof(struct iphdr) ||
39 ip_hdrlen(skb) < sizeof(struct iphdr))
40 return NF_ACCEPT;
41
42 nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
43
44 mark = skb->mark;
45 iph = ip_hdr(skb);
46 saddr = iph->saddr;
47 daddr = iph->daddr;
48 tos = iph->tos;
49
50 ret = nft_do_chain_pktinfo(&pkt, ops);
51 if (ret != NF_DROP && ret != NF_QUEUE) {
52 iph = ip_hdr(skb);
53
54 if (iph->saddr != saddr ||
55 iph->daddr != daddr ||
56 skb->mark != mark ||
57 iph->tos != tos)
58 if (ip_route_me_harder(skb, RTN_UNSPEC))
59 ret = NF_DROP;
60 }
61 return ret;
62}
63
64static struct nf_chain_type nft_chain_route_ipv4 = {
65 .family = NFPROTO_IPV4,
66 .name = "route",
67 .type = NFT_CHAIN_T_ROUTE,
68 .hook_mask = (1 << NF_INET_LOCAL_OUT),
69 .fn = {
70 [NF_INET_LOCAL_OUT] = nf_route_table_hook,
71 },
72 .me = THIS_MODULE,
73};
74
75static int __init nft_chain_route_init(void)
76{
77 return nft_register_chain_type(&nft_chain_route_ipv4);
78}
79
80static void __exit nft_chain_route_exit(void)
81{
82 nft_unregister_chain_type(&nft_chain_route_ipv4);
83}
84
85module_init(nft_chain_route_init);
86module_exit(nft_chain_route_exit);
87
88MODULE_LICENSE("GPL");
89MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
90MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..fff5ba1a33b7
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,123 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables.h>
18#include <net/icmp.h>
19
20struct nft_reject {
21 enum nft_reject_types type:8;
22 u8 icmp_code;
23};
24
25static void nft_reject_eval(const struct nft_expr *expr,
26 struct nft_data data[NFT_REG_MAX + 1],
27 const struct nft_pktinfo *pkt)
28{
29 struct nft_reject *priv = nft_expr_priv(expr);
30
31 switch (priv->type) {
32 case NFT_REJECT_ICMP_UNREACH:
33 icmp_send(pkt->skb, ICMP_DEST_UNREACH, priv->icmp_code, 0);
34 break;
35 case NFT_REJECT_TCP_RST:
36 break;
37 }
38
39 data[NFT_REG_VERDICT].verdict = NF_DROP;
40}
41
42static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
43 [NFTA_REJECT_TYPE] = { .type = NLA_U32 },
44 [NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 },
45};
46
47static int nft_reject_init(const struct nft_ctx *ctx,
48 const struct nft_expr *expr,
49 const struct nlattr * const tb[])
50{
51 struct nft_reject *priv = nft_expr_priv(expr);
52
53 if (tb[NFTA_REJECT_TYPE] == NULL)
54 return -EINVAL;
55
56 priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
57 switch (priv->type) {
58 case NFT_REJECT_ICMP_UNREACH:
59 if (tb[NFTA_REJECT_ICMP_CODE] == NULL)
60 return -EINVAL;
61 priv->icmp_code = nla_get_u8(tb[NFTA_REJECT_ICMP_CODE]);
62 case NFT_REJECT_TCP_RST:
63 break;
64 default:
65 return -EINVAL;
66 }
67
68 return 0;
69}
70
71static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
72{
73 const struct nft_reject *priv = nft_expr_priv(expr);
74
75 if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type))
76 goto nla_put_failure;
77
78 switch (priv->type) {
79 case NFT_REJECT_ICMP_UNREACH:
80 if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code))
81 goto nla_put_failure;
82 break;
83 }
84
85 return 0;
86
87nla_put_failure:
88 return -1;
89}
90
91static struct nft_expr_type nft_reject_type;
92static const struct nft_expr_ops nft_reject_ops = {
93 .type = &nft_reject_type,
94 .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
95 .eval = nft_reject_eval,
96 .init = nft_reject_init,
97 .dump = nft_reject_dump,
98};
99
100static struct nft_expr_type nft_reject_type __read_mostly = {
101 .name = "reject",
102 .ops = &nft_reject_ops,
103 .policy = nft_reject_policy,
104 .maxattr = NFTA_REJECT_MAX,
105 .owner = THIS_MODULE,
106};
107
108static int __init nft_reject_module_init(void)
109{
110 return nft_register_expr(&nft_reject_type);
111}
112
113static void __exit nft_reject_module_exit(void)
114{
115 nft_unregister_expr(&nft_reject_type);
116}
117
118module_init(nft_reject_module_init);
119module_exit(nft_reject_module_exit);
120
121MODULE_LICENSE("GPL");
122MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
123MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6011615e810d..d2d325382b13 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -295,7 +295,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
295 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 295 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
296 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 296 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
297 dst_entries_get_slow(&ipv4_dst_ops), 297 dst_entries_get_slow(&ipv4_dst_ops),
298 st->in_hit, 298 0, /* st->in_hit */
299 st->in_slow_tot, 299 st->in_slow_tot,
300 st->in_slow_mc, 300 st->in_slow_mc,
301 st->in_no_route, 301 st->in_no_route,
@@ -303,16 +303,16 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
303 st->in_martian_dst, 303 st->in_martian_dst,
304 st->in_martian_src, 304 st->in_martian_src,
305 305
306 st->out_hit, 306 0, /* st->out_hit */
307 st->out_slow_tot, 307 st->out_slow_tot,
308 st->out_slow_mc, 308 st->out_slow_mc,
309 309
310 st->gc_total, 310 0, /* st->gc_total */
311 st->gc_ignored, 311 0, /* st->gc_ignored */
312 st->gc_goal_miss, 312 0, /* st->gc_goal_miss */
313 st->gc_dst_overflow, 313 0, /* st->gc_dst_overflow */
314 st->in_hlist_search, 314 0, /* st->in_hlist_search */
315 st->out_hlist_search 315 0 /* st->out_hlist_search */
316 ); 316 );
317 return 0; 317 return 0;
318} 318}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index a7f842b29b67..7702f9e90a04 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -25,6 +25,19 @@ config NF_CONNTRACK_IPV6
25 25
26 To compile it as a module, choose M here. If unsure, say N. 26 To compile it as a module, choose M here. If unsure, say N.
27 27
28config NF_TABLES_IPV6
29 depends on NF_TABLES
30 tristate "IPv6 nf_tables support"
31
32config NFT_CHAIN_ROUTE_IPV6
33 depends on NF_TABLES_IPV6
34 tristate "IPv6 nf_tables route chain support"
35
36config NFT_CHAIN_NAT_IPV6
37 depends on NF_TABLES_IPV6
38 depends on NF_NAT_IPV6 && NFT_NAT
39 tristate "IPv6 nf_tables nat chain support"
40
28config IP6_NF_IPTABLES 41config IP6_NF_IPTABLES
29 tristate "IP6 tables support (required for filtering)" 42 tristate "IP6 tables support (required for filtering)"
30 depends on INET && IPV6 43 depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 2b53738f798c..d1b4928f34f7 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -23,6 +23,11 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
23nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o 23nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
24obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o 24obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
25 25
26# nf_tables
27obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
28obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
29obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
30
26# matches 31# matches
27obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 32obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
28obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o 33obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 2748b042da72..bf9f612c1bc2 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -312,7 +312,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
312 return XT_CONTINUE; 312 return XT_CONTINUE;
313} 313}
314 314
315static unsigned int ipv6_synproxy_hook(unsigned int hooknum, 315static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
316 struct sk_buff *skb, 316 struct sk_buff *skb,
317 const struct net_device *in, 317 const struct net_device *in,
318 const struct net_device *out, 318 const struct net_device *out,
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 29b44b14c5ea..ca7f6c128086 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,13 +32,14 @@ static const struct xt_table packet_filter = {
32 32
33/* The work comes in here from netfilter.c. */ 33/* The work comes in here from netfilter.c. */
34static unsigned int 34static unsigned int
35ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, 35ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
36 const struct net_device *in, const struct net_device *out, 36 const struct net_device *in, const struct net_device *out,
37 int (*okfn)(struct sk_buff *)) 37 int (*okfn)(struct sk_buff *))
38{ 38{
39 const struct net *net = dev_net((in != NULL) ? in : out); 39 const struct net *net = dev_net((in != NULL) ? in : out);
40 40
41 return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); 41 return ip6t_do_table(skb, ops->hooknum, in, out,
42 net->ipv6.ip6table_filter);
42} 43}
43 44
44static struct nf_hook_ops *filter_ops __read_mostly; 45static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c705907ae6ab..307bbb782d14 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
76 76
77/* The work comes in here from netfilter.c. */ 77/* The work comes in here from netfilter.c. */
78static unsigned int 78static unsigned int
79ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, 79ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
80 const struct net_device *in, const struct net_device *out, 80 const struct net_device *in, const struct net_device *out,
81 int (*okfn)(struct sk_buff *)) 81 int (*okfn)(struct sk_buff *))
82{ 82{
83 if (hook == NF_INET_LOCAL_OUT) 83 if (ops->hooknum == NF_INET_LOCAL_OUT)
84 return ip6t_mangle_out(skb, out); 84 return ip6t_mangle_out(skb, out);
85 if (hook == NF_INET_POST_ROUTING) 85 if (ops->hooknum == NF_INET_POST_ROUTING)
86 return ip6t_do_table(skb, hook, in, out, 86 return ip6t_do_table(skb, ops->hooknum, in, out,
87 dev_net(out)->ipv6.ip6table_mangle); 87 dev_net(out)->ipv6.ip6table_mangle);
88 /* INPUT/FORWARD */ 88 /* INPUT/FORWARD */
89 return ip6t_do_table(skb, hook, in, out, 89 return ip6t_do_table(skb, ops->hooknum, in, out,
90 dev_net(in)->ipv6.ip6table_mangle); 90 dev_net(in)->ipv6.ip6table_mangle);
91} 91}
92 92
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 9b076d2d3a7b..84c7f33d0cf8 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
63} 63}
64 64
65static unsigned int 65static unsigned int
66nf_nat_ipv6_fn(unsigned int hooknum, 66nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
67 struct sk_buff *skb, 67 struct sk_buff *skb,
68 const struct net_device *in, 68 const struct net_device *in,
69 const struct net_device *out, 69 const struct net_device *out,
@@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum,
72 struct nf_conn *ct; 72 struct nf_conn *ct;
73 enum ip_conntrack_info ctinfo; 73 enum ip_conntrack_info ctinfo;
74 struct nf_conn_nat *nat; 74 struct nf_conn_nat *nat;
75 enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); 75 enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
76 __be16 frag_off; 76 __be16 frag_off;
77 int hdrlen; 77 int hdrlen;
78 u8 nexthdr; 78 u8 nexthdr;
@@ -111,7 +111,8 @@ nf_nat_ipv6_fn(unsigned int hooknum,
111 111
112 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { 112 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
113 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, 113 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
114 hooknum, hdrlen)) 114 ops->hooknum,
115 hdrlen))
115 return NF_DROP; 116 return NF_DROP;
116 else 117 else
117 return NF_ACCEPT; 118 return NF_ACCEPT;
@@ -124,14 +125,14 @@ nf_nat_ipv6_fn(unsigned int hooknum,
124 if (!nf_nat_initialized(ct, maniptype)) { 125 if (!nf_nat_initialized(ct, maniptype)) {
125 unsigned int ret; 126 unsigned int ret;
126 127
127 ret = nf_nat_rule_find(skb, hooknum, in, out, ct); 128 ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
128 if (ret != NF_ACCEPT) 129 if (ret != NF_ACCEPT)
129 return ret; 130 return ret;
130 } else { 131 } else {
131 pr_debug("Already setup manip %s for ct %p\n", 132 pr_debug("Already setup manip %s for ct %p\n",
132 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 133 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
133 ct); 134 ct);
134 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) 135 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
135 goto oif_changed; 136 goto oif_changed;
136 } 137 }
137 break; 138 break;
@@ -140,11 +141,11 @@ nf_nat_ipv6_fn(unsigned int hooknum,
140 /* ESTABLISHED */ 141 /* ESTABLISHED */
141 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 142 NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
142 ctinfo == IP_CT_ESTABLISHED_REPLY); 143 ctinfo == IP_CT_ESTABLISHED_REPLY);
143 if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) 144 if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
144 goto oif_changed; 145 goto oif_changed;
145 } 146 }
146 147
147 return nf_nat_packet(ct, ctinfo, hooknum, skb); 148 return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
148 149
149oif_changed: 150oif_changed:
150 nf_ct_kill_acct(ct, ctinfo, skb); 151 nf_ct_kill_acct(ct, ctinfo, skb);
@@ -152,7 +153,7 @@ oif_changed:
152} 153}
153 154
154static unsigned int 155static unsigned int
155nf_nat_ipv6_in(unsigned int hooknum, 156nf_nat_ipv6_in(const struct nf_hook_ops *ops,
156 struct sk_buff *skb, 157 struct sk_buff *skb,
157 const struct net_device *in, 158 const struct net_device *in,
158 const struct net_device *out, 159 const struct net_device *out,
@@ -161,7 +162,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
161 unsigned int ret; 162 unsigned int ret;
162 struct in6_addr daddr = ipv6_hdr(skb)->daddr; 163 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
163 164
164 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); 165 ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
165 if (ret != NF_DROP && ret != NF_STOLEN && 166 if (ret != NF_DROP && ret != NF_STOLEN &&
166 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) 167 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
167 skb_dst_drop(skb); 168 skb_dst_drop(skb);
@@ -170,7 +171,7 @@ nf_nat_ipv6_in(unsigned int hooknum,
170} 171}
171 172
172static unsigned int 173static unsigned int
173nf_nat_ipv6_out(unsigned int hooknum, 174nf_nat_ipv6_out(const struct nf_hook_ops *ops,
174 struct sk_buff *skb, 175 struct sk_buff *skb,
175 const struct net_device *in, 176 const struct net_device *in,
176 const struct net_device *out, 177 const struct net_device *out,
@@ -187,7 +188,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
187 if (skb->len < sizeof(struct ipv6hdr)) 188 if (skb->len < sizeof(struct ipv6hdr))
188 return NF_ACCEPT; 189 return NF_ACCEPT;
189 190
190 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); 191 ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
191#ifdef CONFIG_XFRM 192#ifdef CONFIG_XFRM
192 if (ret != NF_DROP && ret != NF_STOLEN && 193 if (ret != NF_DROP && ret != NF_STOLEN &&
193 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && 194 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -209,7 +210,7 @@ nf_nat_ipv6_out(unsigned int hooknum,
209} 210}
210 211
211static unsigned int 212static unsigned int
212nf_nat_ipv6_local_fn(unsigned int hooknum, 213nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
213 struct sk_buff *skb, 214 struct sk_buff *skb,
214 const struct net_device *in, 215 const struct net_device *in,
215 const struct net_device *out, 216 const struct net_device *out,
@@ -224,7 +225,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,
224 if (skb->len < sizeof(struct ipv6hdr)) 225 if (skb->len < sizeof(struct ipv6hdr))
225 return NF_ACCEPT; 226 return NF_ACCEPT;
226 227
227 ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); 228 ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
228 if (ret != NF_DROP && ret != NF_STOLEN && 229 if (ret != NF_DROP && ret != NF_STOLEN &&
229 (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 230 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
230 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 231 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9a626d86720f..5274740acecc 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,13 +19,14 @@ static const struct xt_table packet_raw = {
19 19
20/* The work comes in here from netfilter.c. */ 20/* The work comes in here from netfilter.c. */
21static unsigned int 21static unsigned int
22ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, 22ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
23 const struct net_device *in, const struct net_device *out, 23 const struct net_device *in, const struct net_device *out,
24 int (*okfn)(struct sk_buff *)) 24 int (*okfn)(struct sk_buff *))
25{ 25{
26 const struct net *net = dev_net((in != NULL) ? in : out); 26 const struct net *net = dev_net((in != NULL) ? in : out);
27 27
28 return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); 28 return ip6t_do_table(skb, ops->hooknum, in, out,
29 net->ipv6.ip6table_raw);
29} 30}
30 31
31static struct nf_hook_ops *rawtable_ops __read_mostly; 32static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index ce88d1d7e525..ab3b0219ecfa 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,14 +36,15 @@ static const struct xt_table security_table = {
36}; 36};
37 37
38static unsigned int 38static unsigned int
39ip6table_security_hook(unsigned int hook, struct sk_buff *skb, 39ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
40 const struct net_device *in, 40 const struct net_device *in,
41 const struct net_device *out, 41 const struct net_device *out,
42 int (*okfn)(struct sk_buff *)) 42 int (*okfn)(struct sk_buff *))
43{ 43{
44 const struct net *net = dev_net((in != NULL) ? in : out); 44 const struct net *net = dev_net((in != NULL) ? in : out);
45 45
46 return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); 46 return ip6t_do_table(skb, ops->hooknum, in, out,
47 net->ipv6.ip6table_security);
47} 48}
48 49
49static struct nf_hook_ops *sectbl_ops __read_mostly; 50static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 54b75ead5a69..486545eb42ce 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
95 return NF_ACCEPT; 95 return NF_ACCEPT;
96} 96}
97 97
98static unsigned int ipv6_helper(unsigned int hooknum, 98static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
99 struct sk_buff *skb, 99 struct sk_buff *skb,
100 const struct net_device *in, 100 const struct net_device *in,
101 const struct net_device *out, 101 const struct net_device *out,
@@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,
133 return helper->help(skb, protoff, ct, ctinfo); 133 return helper->help(skb, protoff, ct, ctinfo);
134} 134}
135 135
136static unsigned int ipv6_confirm(unsigned int hooknum, 136static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
137 struct sk_buff *skb, 137 struct sk_buff *skb,
138 const struct net_device *in, 138 const struct net_device *in,
139 const struct net_device *out, 139 const struct net_device *out,
@@ -219,16 +219,17 @@ static unsigned int __ipv6_conntrack_in(struct net *net,
219 return nf_conntrack_in(net, PF_INET6, hooknum, skb); 219 return nf_conntrack_in(net, PF_INET6, hooknum, skb);
220} 220}
221 221
222static unsigned int ipv6_conntrack_in(unsigned int hooknum, 222static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
223 struct sk_buff *skb, 223 struct sk_buff *skb,
224 const struct net_device *in, 224 const struct net_device *in,
225 const struct net_device *out, 225 const struct net_device *out,
226 int (*okfn)(struct sk_buff *)) 226 int (*okfn)(struct sk_buff *))
227{ 227{
228 return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); 228 return __ipv6_conntrack_in(dev_net(in), ops->hooknum, skb, in, out,
229 okfn);
229} 230}
230 231
231static unsigned int ipv6_conntrack_local(unsigned int hooknum, 232static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
232 struct sk_buff *skb, 233 struct sk_buff *skb,
233 const struct net_device *in, 234 const struct net_device *in,
234 const struct net_device *out, 235 const struct net_device *out,
@@ -239,7 +240,8 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
239 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); 240 net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
240 return NF_ACCEPT; 241 return NF_ACCEPT;
241 } 242 }
242 return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); 243 return __ipv6_conntrack_in(dev_net(out), ops->hooknum, skb, in, out,
244 okfn);
243} 245}
244 246
245static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 247static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121fe8c5..ec483aa3f60f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
52 52
53} 53}
54 54
55static unsigned int ipv6_defrag(unsigned int hooknum, 55static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
56 struct sk_buff *skb, 56 struct sk_buff *skb,
57 const struct net_device *in, 57 const struct net_device *in,
58 const struct net_device *out, 58 const struct net_device *out,
@@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
66 return NF_ACCEPT; 66 return NF_ACCEPT;
67#endif 67#endif
68 68
69 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); 69 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
70 /* queued */ 70 /* queued */
71 if (reasm == NULL) 71 if (reasm == NULL)
72 return NF_STOLEN; 72 return NF_STOLEN;
@@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
75 if (reasm == skb) 75 if (reasm == skb)
76 return NF_ACCEPT; 76 return NF_ACCEPT;
77 77
78 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, 78 nf_ct_frag6_output(ops->hooknum, reasm, (struct net_device *)in,
79 (struct net_device *)out, okfn); 79 (struct net_device *)out, okfn);
80 80
81 return NF_STOLEN; 81 return NF_STOLEN;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
new file mode 100644
index 000000000000..d77db8a13505
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -0,0 +1,127 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
10 */
11
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/ipv6.h>
15#include <linux/netfilter_ipv6.h>
16#include <net/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables_ipv6.h>
18
19static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
20 struct sk_buff *skb,
21 const struct net_device *in,
22 const struct net_device *out,
23 int (*okfn)(struct sk_buff *))
24{
25 struct nft_pktinfo pkt;
26
27 if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
28 if (net_ratelimit())
29 pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
30 "packet\n");
31 return NF_ACCEPT;
32 }
33 if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
34 return NF_DROP;
35
36 return nft_do_chain_pktinfo(&pkt, ops);
37}
38
39static struct nft_af_info nft_af_ipv6 __read_mostly = {
40 .family = NFPROTO_IPV6,
41 .nhooks = NF_INET_NUMHOOKS,
42 .owner = THIS_MODULE,
43 .hooks = {
44 [NF_INET_LOCAL_OUT] = nft_ipv6_output,
45 },
46};
47
48static int nf_tables_ipv6_init_net(struct net *net)
49{
50 net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
51 if (net->nft.ipv6 == NULL)
52 return -ENOMEM;
53
54 memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
55
56 if (nft_register_afinfo(net, net->nft.ipv6) < 0)
57 goto err;
58
59 return 0;
60err:
61 kfree(net->nft.ipv6);
62 return -ENOMEM;
63}
64
65static void nf_tables_ipv6_exit_net(struct net *net)
66{
67 nft_unregister_afinfo(net->nft.ipv6);
68 kfree(net->nft.ipv6);
69}
70
71static struct pernet_operations nf_tables_ipv6_net_ops = {
72 .init = nf_tables_ipv6_init_net,
73 .exit = nf_tables_ipv6_exit_net,
74};
75
76static unsigned int
77nft_do_chain_ipv6(const struct nf_hook_ops *ops,
78 struct sk_buff *skb,
79 const struct net_device *in,
80 const struct net_device *out,
81 int (*okfn)(struct sk_buff *))
82{
83 struct nft_pktinfo pkt;
84
85 /* malformed packet, drop it */
86 if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
87 return NF_DROP;
88
89 return nft_do_chain_pktinfo(&pkt, ops);
90}
91
92static struct nf_chain_type filter_ipv6 = {
93 .family = NFPROTO_IPV6,
94 .name = "filter",
95 .type = NFT_CHAIN_T_DEFAULT,
96 .hook_mask = (1 << NF_INET_LOCAL_IN) |
97 (1 << NF_INET_LOCAL_OUT) |
98 (1 << NF_INET_FORWARD) |
99 (1 << NF_INET_PRE_ROUTING) |
100 (1 << NF_INET_POST_ROUTING),
101 .fn = {
102 [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
103 [NF_INET_LOCAL_OUT] = nft_ipv6_output,
104 [NF_INET_FORWARD] = nft_do_chain_ipv6,
105 [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
106 [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
107 },
108};
109
110static int __init nf_tables_ipv6_init(void)
111{
112 nft_register_chain_type(&filter_ipv6);
113 return register_pernet_subsys(&nf_tables_ipv6_net_ops);
114}
115
116static void __exit nf_tables_ipv6_exit(void)
117{
118 unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
119 nft_unregister_chain_type(&filter_ipv6);
120}
121
122module_init(nf_tables_ipv6_init);
123module_exit(nf_tables_ipv6_exit);
124
125MODULE_LICENSE("GPL");
126MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
127MODULE_ALIAS_NFT_FAMILY(AF_INET6);
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
new file mode 100644
index 000000000000..e86dcd70dc76
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -0,0 +1,211 @@
1/*
2 * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2012 Intel Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/list.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter_ipv6.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_conntrack.h>
20#include <net/netfilter/nf_nat.h>
21#include <net/netfilter/nf_nat_core.h>
22#include <net/netfilter/nf_tables.h>
23#include <net/netfilter/nf_tables_ipv6.h>
24#include <net/netfilter/nf_nat_l3proto.h>
25#include <net/ipv6.h>
26
27/*
28 * IPv6 NAT chains
29 */
30
31static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops,
32 struct sk_buff *skb,
33 const struct net_device *in,
34 const struct net_device *out,
35 int (*okfn)(struct sk_buff *))
36{
37 enum ip_conntrack_info ctinfo;
38 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
39 struct nf_conn_nat *nat;
40 enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
41 __be16 frag_off;
42 int hdrlen;
43 u8 nexthdr;
44 struct nft_pktinfo pkt;
45 unsigned int ret;
46
47 if (ct == NULL || nf_ct_is_untracked(ct))
48 return NF_ACCEPT;
49
50 nat = nfct_nat(ct);
51 if (nat == NULL) {
52 /* Conntrack module was loaded late, can't add extension. */
53 if (nf_ct_is_confirmed(ct))
54 return NF_ACCEPT;
55 nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
56 if (nat == NULL)
57 return NF_ACCEPT;
58 }
59
60 switch (ctinfo) {
61 case IP_CT_RELATED:
62 case IP_CT_RELATED + IP_CT_IS_REPLY:
63 nexthdr = ipv6_hdr(skb)->nexthdr;
64 hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
65 &nexthdr, &frag_off);
66
67 if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
68 if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
69 ops->hooknum,
70 hdrlen))
71 return NF_DROP;
72 else
73 return NF_ACCEPT;
74 }
75 /* Fall through */
76 case IP_CT_NEW:
77 if (nf_nat_initialized(ct, maniptype))
78 break;
79
80 nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out);
81
82 ret = nft_do_chain_pktinfo(&pkt, ops);
83 if (ret != NF_ACCEPT)
84 return ret;
85 if (!nf_nat_initialized(ct, maniptype)) {
86 ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
87 if (ret != NF_ACCEPT)
88 return ret;
89 }
90 default:
91 break;
92 }
93
94 return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
95}
96
97static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops,
98 struct sk_buff *skb,
99 const struct net_device *in,
100 const struct net_device *out,
101 int (*okfn)(struct sk_buff *))
102{
103 struct in6_addr daddr = ipv6_hdr(skb)->daddr;
104 unsigned int ret;
105
106 ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
107 if (ret != NF_DROP && ret != NF_STOLEN &&
108 ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
109 skb_dst_drop(skb);
110
111 return ret;
112}
113
114static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops,
115 struct sk_buff *skb,
116 const struct net_device *in,
117 const struct net_device *out,
118 int (*okfn)(struct sk_buff *))
119{
120 enum ip_conntrack_info ctinfo __maybe_unused;
121 const struct nf_conn *ct __maybe_unused;
122 unsigned int ret;
123
124 ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
125#ifdef CONFIG_XFRM
126 if (ret != NF_DROP && ret != NF_STOLEN &&
127 !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
128 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
129 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
130
131 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
132 &ct->tuplehash[!dir].tuple.dst.u3) ||
133 (ct->tuplehash[dir].tuple.src.u.all !=
134 ct->tuplehash[!dir].tuple.dst.u.all))
135 if (nf_xfrm_me_harder(skb, AF_INET6) < 0)
136 ret = NF_DROP;
137 }
138#endif
139 return ret;
140}
141
142static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops,
143 struct sk_buff *skb,
144 const struct net_device *in,
145 const struct net_device *out,
146 int (*okfn)(struct sk_buff *))
147{
148 enum ip_conntrack_info ctinfo;
149 const struct nf_conn *ct;
150 unsigned int ret;
151
152 ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);
153 if (ret != NF_DROP && ret != NF_STOLEN &&
154 (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
155 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
156
157 if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
158 &ct->tuplehash[!dir].tuple.src.u3)) {
159 if (ip6_route_me_harder(skb))
160 ret = NF_DROP;
161 }
162#ifdef CONFIG_XFRM
163 else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
164 ct->tuplehash[dir].tuple.dst.u.all !=
165 ct->tuplehash[!dir].tuple.src.u.all)
166 if (nf_xfrm_me_harder(skb, AF_INET6))
167 ret = NF_DROP;
168#endif
169 }
170 return ret;
171}
172
173static struct nf_chain_type nft_chain_nat_ipv6 = {
174 .family = NFPROTO_IPV6,
175 .name = "nat",
176 .type = NFT_CHAIN_T_NAT,
177 .hook_mask = (1 << NF_INET_PRE_ROUTING) |
178 (1 << NF_INET_POST_ROUTING) |
179 (1 << NF_INET_LOCAL_OUT) |
180 (1 << NF_INET_LOCAL_IN),
181 .fn = {
182 [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting,
183 [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting,
184 [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output,
185 [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn,
186 },
187 .me = THIS_MODULE,
188};
189
190static int __init nft_chain_nat_ipv6_init(void)
191{
192 int err;
193
194 err = nft_register_chain_type(&nft_chain_nat_ipv6);
195 if (err < 0)
196 return err;
197
198 return 0;
199}
200
201static void __exit nft_chain_nat_ipv6_exit(void)
202{
203 nft_unregister_chain_type(&nft_chain_nat_ipv6);
204}
205
206module_init(nft_chain_nat_ipv6_init);
207module_exit(nft_chain_nat_ipv6_exit);
208
209MODULE_LICENSE("GPL");
210MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
211MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
new file mode 100644
index 000000000000..3fe40f0456ad
--- /dev/null
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -0,0 +1,88 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 *
9 * Development of this code funded by Astaro AG (http://www.astaro.com/)
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/list.h>
15#include <linux/skbuff.h>
16#include <linux/netlink.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter_ipv6.h>
19#include <linux/netfilter/nfnetlink.h>
20#include <linux/netfilter/nf_tables.h>
21#include <net/netfilter/nf_tables.h>
22#include <net/netfilter/nf_tables_ipv6.h>
23#include <net/route.h>
24
25static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
26 struct sk_buff *skb,
27 const struct net_device *in,
28 const struct net_device *out,
29 int (*okfn)(struct sk_buff *))
30{
31 unsigned int ret;
32 struct nft_pktinfo pkt;
33 struct in6_addr saddr, daddr;
34 u_int8_t hop_limit;
35 u32 mark, flowlabel;
36
37 /* malformed packet, drop it */
38 if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0)
39 return NF_DROP;
40
41 /* save source/dest address, mark, hoplimit, flowlabel, priority */
42 memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
43 memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr));
44 mark = skb->mark;
45 hop_limit = ipv6_hdr(skb)->hop_limit;
46
47 /* flowlabel and prio (includes version, which shouldn't change either */
48 flowlabel = *((u32 *)ipv6_hdr(skb));
49
50 ret = nft_do_chain_pktinfo(&pkt, ops);
51 if (ret != NF_DROP && ret != NF_QUEUE &&
52 (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
53 memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
54 skb->mark != mark ||
55 ipv6_hdr(skb)->hop_limit != hop_limit ||
56 flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
57 return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
58
59 return ret;
60}
61
62static struct nf_chain_type nft_chain_route_ipv6 = {
63 .family = NFPROTO_IPV6,
64 .name = "route",
65 .type = NFT_CHAIN_T_ROUTE,
66 .hook_mask = (1 << NF_INET_LOCAL_OUT),
67 .fn = {
68 [NF_INET_LOCAL_OUT] = nf_route_table_hook,
69 },
70 .me = THIS_MODULE,
71};
72
73static int __init nft_chain_route_init(void)
74{
75 return nft_register_chain_type(&nft_chain_route_ipv6);
76}
77
78static void __exit nft_chain_route_exit(void)
79{
80 nft_unregister_chain_type(&nft_chain_route_ipv6);
81}
82
83module_init(nft_chain_route_init);
84module_exit(nft_chain_route_exit);
85
86MODULE_LICENSE("GPL");
87MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
88MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6e839b6dff2b..48acec17e27a 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -413,6 +413,58 @@ config NETFILTER_SYNPROXY
413 413
414endif # NF_CONNTRACK 414endif # NF_CONNTRACK
415 415
416config NF_TABLES
417 depends on NETFILTER_NETLINK
418 tristate "Netfilter nf_tables support"
419
420config NFT_EXTHDR
421 depends on NF_TABLES
422 tristate "Netfilter nf_tables IPv6 exthdr module"
423
424config NFT_META
425 depends on NF_TABLES
426 tristate "Netfilter nf_tables meta module"
427
428config NFT_CT
429 depends on NF_TABLES
430 depends on NF_CONNTRACK
431 tristate "Netfilter nf_tables conntrack module"
432
433config NFT_RBTREE
434 depends on NF_TABLES
435 tristate "Netfilter nf_tables rbtree set module"
436
437config NFT_HASH
438 depends on NF_TABLES
439 tristate "Netfilter nf_tables hash set module"
440
441config NFT_COUNTER
442 depends on NF_TABLES
443 tristate "Netfilter nf_tables counter module"
444
445config NFT_LOG
446 depends on NF_TABLES
447 tristate "Netfilter nf_tables log module"
448
449config NFT_LIMIT
450 depends on NF_TABLES
451 tristate "Netfilter nf_tables limit module"
452
453config NFT_NAT
454 depends on NF_TABLES
455 depends on NF_CONNTRACK
456 depends on NF_NAT
457 tristate "Netfilter nf_tables nat module"
458
459config NFT_COMPAT
460 depends on NF_TABLES
461 depends on NETFILTER_XTABLES
462 tristate "Netfilter x_tables over nf_tables module"
463 help
464 This is required if you intend to use any of existing
465 x_tables match/target extensions over the nf_tables
466 framework.
467
416config NETFILTER_XTABLES 468config NETFILTER_XTABLES
417 tristate "Netfilter Xtables support (required for ip_tables)" 469 tristate "Netfilter Xtables support (required for ip_tables)"
418 default m if NETFILTER_ADVANCED=n 470 default m if NETFILTER_ADVANCED=n
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index c3a0a12907f6..394483b2c193 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -64,6 +64,24 @@ obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
64# SYNPROXY 64# SYNPROXY
65obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o 65obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
66 66
67# nf_tables
68nf_tables-objs += nf_tables_core.o nf_tables_api.o
69nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
70nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
71
72obj-$(CONFIG_NF_TABLES) += nf_tables.o
73obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
74obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o
75obj-$(CONFIG_NFT_META) += nft_meta.o
76obj-$(CONFIG_NFT_CT) += nft_ct.o
77obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
78obj-$(CONFIG_NFT_NAT) += nft_nat.o
79#nf_tables-objs += nft_meta_target.o
80obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
81obj-$(CONFIG_NFT_HASH) += nft_hash.o
82obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
83obj-$(CONFIG_NFT_LOG) += nft_log.o
84
67# generic X tables 85# generic X tables
68obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o 86obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
69 87
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 593b16ea45e0..1fbab0cdd302 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -146,7 +146,7 @@ unsigned int nf_iterate(struct list_head *head,
146 /* Optimization: we don't need to hold module 146 /* Optimization: we don't need to hold module
147 reference here, since function can't sleep. --RR */ 147 reference here, since function can't sleep. --RR */
148repeat: 148repeat:
149 verdict = (*elemp)->hook(hook, skb, indev, outdev, okfn); 149 verdict = (*elemp)->hook(*elemp, skb, indev, outdev, okfn);
150 if (verdict != NF_ACCEPT) { 150 if (verdict != NF_ACCEPT) {
151#ifdef CONFIG_NETFILTER_DEBUG 151#ifdef CONFIG_NETFILTER_DEBUG
152 if (unlikely((verdict & NF_VERDICT_MASK) 152 if (unlikely((verdict & NF_VERDICT_MASK)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 74fd00c27210..34fda62f40f6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1239,11 +1239,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
1239 * Check if packet is reply for established ip_vs_conn. 1239 * Check if packet is reply for established ip_vs_conn.
1240 */ 1240 */
1241static unsigned int 1241static unsigned int
1242ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, 1242ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1243 const struct net_device *in, const struct net_device *out, 1243 const struct net_device *in, const struct net_device *out,
1244 int (*okfn)(struct sk_buff *)) 1244 int (*okfn)(struct sk_buff *))
1245{ 1245{
1246 return ip_vs_out(hooknum, skb, AF_INET); 1246 return ip_vs_out(ops->hooknum, skb, AF_INET);
1247} 1247}
1248 1248
1249/* 1249/*
@@ -1251,11 +1251,11 @@ ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
1251 * Check if packet is reply for established ip_vs_conn. 1251 * Check if packet is reply for established ip_vs_conn.
1252 */ 1252 */
1253static unsigned int 1253static unsigned int
1254ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, 1254ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1255 const struct net_device *in, const struct net_device *out, 1255 const struct net_device *in, const struct net_device *out,
1256 int (*okfn)(struct sk_buff *)) 1256 int (*okfn)(struct sk_buff *))
1257{ 1257{
1258 return ip_vs_out(hooknum, skb, AF_INET); 1258 return ip_vs_out(ops->hooknum, skb, AF_INET);
1259} 1259}
1260 1260
1261#ifdef CONFIG_IP_VS_IPV6 1261#ifdef CONFIG_IP_VS_IPV6
@@ -1266,11 +1266,11 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1266 * Check if packet is reply for established ip_vs_conn. 1266 * Check if packet is reply for established ip_vs_conn.
1267 */ 1267 */
1268static unsigned int 1268static unsigned int
1269ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, 1269ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1270 const struct net_device *in, const struct net_device *out, 1270 const struct net_device *in, const struct net_device *out,
1271 int (*okfn)(struct sk_buff *)) 1271 int (*okfn)(struct sk_buff *))
1272{ 1272{
1273 return ip_vs_out(hooknum, skb, AF_INET6); 1273 return ip_vs_out(ops->hooknum, skb, AF_INET6);
1274} 1274}
1275 1275
1276/* 1276/*
@@ -1278,11 +1278,11 @@ ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
1278 * Check if packet is reply for established ip_vs_conn. 1278 * Check if packet is reply for established ip_vs_conn.
1279 */ 1279 */
1280static unsigned int 1280static unsigned int
1281ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, 1281ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1282 const struct net_device *in, const struct net_device *out, 1282 const struct net_device *in, const struct net_device *out,
1283 int (*okfn)(struct sk_buff *)) 1283 int (*okfn)(struct sk_buff *))
1284{ 1284{
1285 return ip_vs_out(hooknum, skb, AF_INET6); 1285 return ip_vs_out(ops->hooknum, skb, AF_INET6);
1286} 1286}
1287 1287
1288#endif 1288#endif
@@ -1733,12 +1733,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1733 * Schedule and forward packets from remote clients 1733 * Schedule and forward packets from remote clients
1734 */ 1734 */
1735static unsigned int 1735static unsigned int
1736ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb, 1736ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1737 const struct net_device *in, 1737 const struct net_device *in,
1738 const struct net_device *out, 1738 const struct net_device *out,
1739 int (*okfn)(struct sk_buff *)) 1739 int (*okfn)(struct sk_buff *))
1740{ 1740{
1741 return ip_vs_in(hooknum, skb, AF_INET); 1741 return ip_vs_in(ops->hooknum, skb, AF_INET);
1742} 1742}
1743 1743
1744/* 1744/*
@@ -1746,11 +1746,11 @@ ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
1746 * Schedule and forward packets from local clients 1746 * Schedule and forward packets from local clients
1747 */ 1747 */
1748static unsigned int 1748static unsigned int
1749ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, 1749ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
1750 const struct net_device *in, const struct net_device *out, 1750 const struct net_device *in, const struct net_device *out,
1751 int (*okfn)(struct sk_buff *)) 1751 int (*okfn)(struct sk_buff *))
1752{ 1752{
1753 return ip_vs_in(hooknum, skb, AF_INET); 1753 return ip_vs_in(ops->hooknum, skb, AF_INET);
1754} 1754}
1755 1755
1756#ifdef CONFIG_IP_VS_IPV6 1756#ifdef CONFIG_IP_VS_IPV6
@@ -1760,7 +1760,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1760 * Copy info from first fragment, to the rest of them. 1760 * Copy info from first fragment, to the rest of them.
1761 */ 1761 */
1762static unsigned int 1762static unsigned int
1763ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, 1763ip_vs_preroute_frag6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1764 const struct net_device *in, 1764 const struct net_device *in,
1765 const struct net_device *out, 1765 const struct net_device *out,
1766 int (*okfn)(struct sk_buff *)) 1766 int (*okfn)(struct sk_buff *))
@@ -1792,12 +1792,12 @@ ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
1792 * Schedule and forward packets from remote clients 1792 * Schedule and forward packets from remote clients
1793 */ 1793 */
1794static unsigned int 1794static unsigned int
1795ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb, 1795ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1796 const struct net_device *in, 1796 const struct net_device *in,
1797 const struct net_device *out, 1797 const struct net_device *out,
1798 int (*okfn)(struct sk_buff *)) 1798 int (*okfn)(struct sk_buff *))
1799{ 1799{
1800 return ip_vs_in(hooknum, skb, AF_INET6); 1800 return ip_vs_in(ops->hooknum, skb, AF_INET6);
1801} 1801}
1802 1802
1803/* 1803/*
@@ -1805,11 +1805,11 @@ ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
1805 * Schedule and forward packets from local clients 1805 * Schedule and forward packets from local clients
1806 */ 1806 */
1807static unsigned int 1807static unsigned int
1808ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb, 1808ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1809 const struct net_device *in, const struct net_device *out, 1809 const struct net_device *in, const struct net_device *out,
1810 int (*okfn)(struct sk_buff *)) 1810 int (*okfn)(struct sk_buff *))
1811{ 1811{
1812 return ip_vs_in(hooknum, skb, AF_INET6); 1812 return ip_vs_in(ops->hooknum, skb, AF_INET6);
1813} 1813}
1814 1814
1815#endif 1815#endif
@@ -1825,7 +1825,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
1825 * and send them to ip_vs_in_icmp. 1825 * and send them to ip_vs_in_icmp.
1826 */ 1826 */
1827static unsigned int 1827static unsigned int
1828ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, 1828ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
1829 const struct net_device *in, const struct net_device *out, 1829 const struct net_device *in, const struct net_device *out,
1830 int (*okfn)(struct sk_buff *)) 1830 int (*okfn)(struct sk_buff *))
1831{ 1831{
@@ -1842,12 +1842,12 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
1842 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) 1842 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
1843 return NF_ACCEPT; 1843 return NF_ACCEPT;
1844 1844
1845 return ip_vs_in_icmp(skb, &r, hooknum); 1845 return ip_vs_in_icmp(skb, &r, ops->hooknum);
1846} 1846}
1847 1847
1848#ifdef CONFIG_IP_VS_IPV6 1848#ifdef CONFIG_IP_VS_IPV6
1849static unsigned int 1849static unsigned int
1850ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, 1850ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
1851 const struct net_device *in, const struct net_device *out, 1851 const struct net_device *in, const struct net_device *out,
1852 int (*okfn)(struct sk_buff *)) 1852 int (*okfn)(struct sk_buff *))
1853{ 1853{
@@ -1866,7 +1866,7 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1866 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable)) 1866 if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
1867 return NF_ACCEPT; 1867 return NF_ACCEPT;
1868 1868
1869 return ip_vs_in_icmp_v6(skb, &r, hooknum, &iphdr); 1869 return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
1870} 1870}
1871#endif 1871#endif
1872 1872
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6f0f4f7f68a5..63a815402211 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -432,6 +432,26 @@ nf_nat_setup_info(struct nf_conn *ct,
432} 432}
433EXPORT_SYMBOL(nf_nat_setup_info); 433EXPORT_SYMBOL(nf_nat_setup_info);
434 434
435unsigned int
436nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
437{
438 /* Force range to this IP; let proto decide mapping for
439 * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
440 * Use reply in case it's already been mangled (eg local packet).
441 */
442 union nf_inet_addr ip =
443 (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
444 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
445 ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
446 struct nf_nat_range range = {
447 .flags = NF_NAT_RANGE_MAP_IPS,
448 .min_addr = ip,
449 .max_addr = ip,
450 };
451 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
452}
453EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
454
435/* Do packet manipulations according to nf_nat_setup_info. */ 455/* Do packet manipulations according to nf_nat_setup_info. */
436unsigned int nf_nat_packet(struct nf_conn *ct, 456unsigned int nf_nat_packet(struct nf_conn *ct,
437 enum ip_conntrack_info ctinfo, 457 enum ip_conntrack_info ctinfo,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
new file mode 100644
index 000000000000..dcddc49c0e08
--- /dev/null
+++ b/net/netfilter/nf_tables_api.c
@@ -0,0 +1,3275 @@
1/*
2 * Copyright (c) 2007-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/list.h>
14#include <linux/skbuff.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nfnetlink.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables_core.h>
20#include <net/netfilter/nf_tables.h>
21#include <net/net_namespace.h>
22#include <net/sock.h>
23
24static LIST_HEAD(nf_tables_expressions);
25
26/**
27 * nft_register_afinfo - register nf_tables address family info
28 *
29 * @afi: address family info to register
30 *
31 * Register the address family for use with nf_tables. Returns zero on
32 * success or a negative errno code otherwise.
33 */
34int nft_register_afinfo(struct net *net, struct nft_af_info *afi)
35{
36 INIT_LIST_HEAD(&afi->tables);
37 nfnl_lock(NFNL_SUBSYS_NFTABLES);
38 list_add_tail(&afi->list, &net->nft.af_info);
39 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
40 return 0;
41}
42EXPORT_SYMBOL_GPL(nft_register_afinfo);
43
44/**
45 * nft_unregister_afinfo - unregister nf_tables address family info
46 *
47 * @afi: address family info to unregister
48 *
49 * Unregister the address family for use with nf_tables.
50 */
51void nft_unregister_afinfo(struct nft_af_info *afi)
52{
53 nfnl_lock(NFNL_SUBSYS_NFTABLES);
54 list_del(&afi->list);
55 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
56}
57EXPORT_SYMBOL_GPL(nft_unregister_afinfo);
58
59static struct nft_af_info *nft_afinfo_lookup(struct net *net, int family)
60{
61 struct nft_af_info *afi;
62
63 list_for_each_entry(afi, &net->nft.af_info, list) {
64 if (afi->family == family)
65 return afi;
66 }
67 return NULL;
68}
69
70static struct nft_af_info *
71nf_tables_afinfo_lookup(struct net *net, int family, bool autoload)
72{
73 struct nft_af_info *afi;
74
75 afi = nft_afinfo_lookup(net, family);
76 if (afi != NULL)
77 return afi;
78#ifdef CONFIG_MODULES
79 if (autoload) {
80 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
81 request_module("nft-afinfo-%u", family);
82 nfnl_lock(NFNL_SUBSYS_NFTABLES);
83 afi = nft_afinfo_lookup(net, family);
84 if (afi != NULL)
85 return ERR_PTR(-EAGAIN);
86 }
87#endif
88 return ERR_PTR(-EAFNOSUPPORT);
89}
90
91/*
92 * Tables
93 */
94
95static struct nft_table *nft_table_lookup(const struct nft_af_info *afi,
96 const struct nlattr *nla)
97{
98 struct nft_table *table;
99
100 list_for_each_entry(table, &afi->tables, list) {
101 if (!nla_strcmp(nla, table->name))
102 return table;
103 }
104 return NULL;
105}
106
107static struct nft_table *nf_tables_table_lookup(const struct nft_af_info *afi,
108 const struct nlattr *nla)
109{
110 struct nft_table *table;
111
112 if (nla == NULL)
113 return ERR_PTR(-EINVAL);
114
115 table = nft_table_lookup(afi, nla);
116 if (table != NULL)
117 return table;
118
119 return ERR_PTR(-ENOENT);
120}
121
122static inline u64 nf_tables_alloc_handle(struct nft_table *table)
123{
124 return ++table->hgenerator;
125}
126
127static struct nf_chain_type *chain_type[AF_MAX][NFT_CHAIN_T_MAX];
128
129static int __nf_tables_chain_type_lookup(int family, const struct nlattr *nla)
130{
131 int i;
132
133 for (i=0; i<NFT_CHAIN_T_MAX; i++) {
134 if (chain_type[family][i] != NULL &&
135 !nla_strcmp(nla, chain_type[family][i]->name))
136 return i;
137 }
138 return -1;
139}
140
141static int nf_tables_chain_type_lookup(const struct nft_af_info *afi,
142 const struct nlattr *nla,
143 bool autoload)
144{
145 int type;
146
147 type = __nf_tables_chain_type_lookup(afi->family, nla);
148#ifdef CONFIG_MODULES
149 if (type < 0 && autoload) {
150 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
151 request_module("nft-chain-%u-%*.s", afi->family,
152 nla_len(nla)-1, (const char *)nla_data(nla));
153 nfnl_lock(NFNL_SUBSYS_NFTABLES);
154 type = __nf_tables_chain_type_lookup(afi->family, nla);
155 }
156#endif
157 return type;
158}
159
160static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
161 [NFTA_TABLE_NAME] = { .type = NLA_STRING },
162 [NFTA_TABLE_FLAGS] = { .type = NLA_U32 },
163};
164
165static int nf_tables_fill_table_info(struct sk_buff *skb, u32 portid, u32 seq,
166 int event, u32 flags, int family,
167 const struct nft_table *table)
168{
169 struct nlmsghdr *nlh;
170 struct nfgenmsg *nfmsg;
171
172 event |= NFNL_SUBSYS_NFTABLES << 8;
173 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
174 if (nlh == NULL)
175 goto nla_put_failure;
176
177 nfmsg = nlmsg_data(nlh);
178 nfmsg->nfgen_family = family;
179 nfmsg->version = NFNETLINK_V0;
180 nfmsg->res_id = 0;
181
182 if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
183 nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)))
184 goto nla_put_failure;
185
186 return nlmsg_end(skb, nlh);
187
188nla_put_failure:
189 nlmsg_trim(skb, nlh);
190 return -1;
191}
192
193static int nf_tables_table_notify(const struct sk_buff *oskb,
194 const struct nlmsghdr *nlh,
195 const struct nft_table *table,
196 int event, int family)
197{
198 struct sk_buff *skb;
199 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
200 u32 seq = nlh ? nlh->nlmsg_seq : 0;
201 struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
202 bool report;
203 int err;
204
205 report = nlh ? nlmsg_report(nlh) : false;
206 if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
207 return 0;
208
209 err = -ENOBUFS;
210 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
211 if (skb == NULL)
212 goto err;
213
214 err = nf_tables_fill_table_info(skb, portid, seq, event, 0,
215 family, table);
216 if (err < 0) {
217 kfree_skb(skb);
218 goto err;
219 }
220
221 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
222 GFP_KERNEL);
223err:
224 if (err < 0)
225 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
226 return err;
227}
228
229static int nf_tables_dump_tables(struct sk_buff *skb,
230 struct netlink_callback *cb)
231{
232 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
233 const struct nft_af_info *afi;
234 const struct nft_table *table;
235 unsigned int idx = 0, s_idx = cb->args[0];
236 struct net *net = sock_net(skb->sk);
237 int family = nfmsg->nfgen_family;
238
239 list_for_each_entry(afi, &net->nft.af_info, list) {
240 if (family != NFPROTO_UNSPEC && family != afi->family)
241 continue;
242
243 list_for_each_entry(table, &afi->tables, list) {
244 if (idx < s_idx)
245 goto cont;
246 if (idx > s_idx)
247 memset(&cb->args[1], 0,
248 sizeof(cb->args) - sizeof(cb->args[0]));
249 if (nf_tables_fill_table_info(skb,
250 NETLINK_CB(cb->skb).portid,
251 cb->nlh->nlmsg_seq,
252 NFT_MSG_NEWTABLE,
253 NLM_F_MULTI,
254 afi->family, table) < 0)
255 goto done;
256cont:
257 idx++;
258 }
259 }
260done:
261 cb->args[0] = idx;
262 return skb->len;
263}
264
265static int nf_tables_gettable(struct sock *nlsk, struct sk_buff *skb,
266 const struct nlmsghdr *nlh,
267 const struct nlattr * const nla[])
268{
269 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
270 const struct nft_af_info *afi;
271 const struct nft_table *table;
272 struct sk_buff *skb2;
273 struct net *net = sock_net(skb->sk);
274 int family = nfmsg->nfgen_family;
275 int err;
276
277 if (nlh->nlmsg_flags & NLM_F_DUMP) {
278 struct netlink_dump_control c = {
279 .dump = nf_tables_dump_tables,
280 };
281 return netlink_dump_start(nlsk, skb, nlh, &c);
282 }
283
284 afi = nf_tables_afinfo_lookup(net, family, false);
285 if (IS_ERR(afi))
286 return PTR_ERR(afi);
287
288 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
289 if (IS_ERR(table))
290 return PTR_ERR(table);
291
292 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
293 if (!skb2)
294 return -ENOMEM;
295
296 err = nf_tables_fill_table_info(skb2, NETLINK_CB(skb).portid,
297 nlh->nlmsg_seq, NFT_MSG_NEWTABLE, 0,
298 family, table);
299 if (err < 0)
300 goto err;
301
302 return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
303
304err:
305 kfree_skb(skb2);
306 return err;
307}
308
309static int nf_tables_table_enable(struct nft_table *table)
310{
311 struct nft_chain *chain;
312 int err, i = 0;
313
314 list_for_each_entry(chain, &table->chains, list) {
315 err = nf_register_hook(&nft_base_chain(chain)->ops);
316 if (err < 0)
317 goto err;
318
319 i++;
320 }
321 return 0;
322err:
323 list_for_each_entry(chain, &table->chains, list) {
324 if (i-- <= 0)
325 break;
326
327 nf_unregister_hook(&nft_base_chain(chain)->ops);
328 }
329 return err;
330}
331
332static int nf_tables_table_disable(struct nft_table *table)
333{
334 struct nft_chain *chain;
335
336 list_for_each_entry(chain, &table->chains, list)
337 nf_unregister_hook(&nft_base_chain(chain)->ops);
338
339 return 0;
340}
341
342static int nf_tables_updtable(struct sock *nlsk, struct sk_buff *skb,
343 const struct nlmsghdr *nlh,
344 const struct nlattr * const nla[],
345 struct nft_af_info *afi, struct nft_table *table)
346{
347 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
348 int family = nfmsg->nfgen_family, ret = 0;
349
350 if (nla[NFTA_TABLE_FLAGS]) {
351 __be32 flags;
352
353 flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
354 if (flags & ~NFT_TABLE_F_DORMANT)
355 return -EINVAL;
356
357 if ((flags & NFT_TABLE_F_DORMANT) &&
358 !(table->flags & NFT_TABLE_F_DORMANT)) {
359 ret = nf_tables_table_disable(table);
360 if (ret >= 0)
361 table->flags |= NFT_TABLE_F_DORMANT;
362 } else if (!(flags & NFT_TABLE_F_DORMANT) &&
363 table->flags & NFT_TABLE_F_DORMANT) {
364 ret = nf_tables_table_enable(table);
365 if (ret >= 0)
366 table->flags &= ~NFT_TABLE_F_DORMANT;
367 }
368 if (ret < 0)
369 goto err;
370 }
371
372 nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
373err:
374 return ret;
375}
376
377static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
378 const struct nlmsghdr *nlh,
379 const struct nlattr * const nla[])
380{
381 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
382 const struct nlattr *name;
383 struct nft_af_info *afi;
384 struct nft_table *table;
385 struct net *net = sock_net(skb->sk);
386 int family = nfmsg->nfgen_family;
387
388 afi = nf_tables_afinfo_lookup(net, family, true);
389 if (IS_ERR(afi))
390 return PTR_ERR(afi);
391
392 name = nla[NFTA_TABLE_NAME];
393 table = nf_tables_table_lookup(afi, name);
394 if (IS_ERR(table)) {
395 if (PTR_ERR(table) != -ENOENT)
396 return PTR_ERR(table);
397 table = NULL;
398 }
399
400 if (table != NULL) {
401 if (nlh->nlmsg_flags & NLM_F_EXCL)
402 return -EEXIST;
403 if (nlh->nlmsg_flags & NLM_F_REPLACE)
404 return -EOPNOTSUPP;
405 return nf_tables_updtable(nlsk, skb, nlh, nla, afi, table);
406 }
407
408 table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
409 if (table == NULL)
410 return -ENOMEM;
411
412 nla_strlcpy(table->name, name, nla_len(name));
413 INIT_LIST_HEAD(&table->chains);
414 INIT_LIST_HEAD(&table->sets);
415
416 if (nla[NFTA_TABLE_FLAGS]) {
417 __be32 flags;
418
419 flags = ntohl(nla_get_be32(nla[NFTA_TABLE_FLAGS]));
420 if (flags & ~NFT_TABLE_F_DORMANT) {
421 kfree(table);
422 return -EINVAL;
423 }
424
425 table->flags |= flags;
426 }
427
428 list_add_tail(&table->list, &afi->tables);
429 nf_tables_table_notify(skb, nlh, table, NFT_MSG_NEWTABLE, family);
430 return 0;
431}
432
433static int nf_tables_deltable(struct sock *nlsk, struct sk_buff *skb,
434 const struct nlmsghdr *nlh,
435 const struct nlattr * const nla[])
436{
437 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
438 struct nft_af_info *afi;
439 struct nft_table *table;
440 struct net *net = sock_net(skb->sk);
441 int family = nfmsg->nfgen_family;
442
443 afi = nf_tables_afinfo_lookup(net, family, false);
444 if (IS_ERR(afi))
445 return PTR_ERR(afi);
446
447 table = nf_tables_table_lookup(afi, nla[NFTA_TABLE_NAME]);
448 if (IS_ERR(table))
449 return PTR_ERR(table);
450
451 if (table->use)
452 return -EBUSY;
453
454 list_del(&table->list);
455 nf_tables_table_notify(skb, nlh, table, NFT_MSG_DELTABLE, family);
456 kfree(table);
457 return 0;
458}
459
460int nft_register_chain_type(struct nf_chain_type *ctype)
461{
462 int err = 0;
463
464 nfnl_lock(NFNL_SUBSYS_NFTABLES);
465 if (chain_type[ctype->family][ctype->type] != NULL) {
466 err = -EBUSY;
467 goto out;
468 }
469
470 if (!try_module_get(ctype->me))
471 goto out;
472
473 chain_type[ctype->family][ctype->type] = ctype;
474out:
475 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
476 return err;
477}
478EXPORT_SYMBOL_GPL(nft_register_chain_type);
479
480void nft_unregister_chain_type(struct nf_chain_type *ctype)
481{
482 nfnl_lock(NFNL_SUBSYS_NFTABLES);
483 chain_type[ctype->family][ctype->type] = NULL;
484 module_put(ctype->me);
485 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
486}
487EXPORT_SYMBOL_GPL(nft_unregister_chain_type);
488
489/*
490 * Chains
491 */
492
493static struct nft_chain *
494nf_tables_chain_lookup_byhandle(const struct nft_table *table, u64 handle)
495{
496 struct nft_chain *chain;
497
498 list_for_each_entry(chain, &table->chains, list) {
499 if (chain->handle == handle)
500 return chain;
501 }
502
503 return ERR_PTR(-ENOENT);
504}
505
506static struct nft_chain *nf_tables_chain_lookup(const struct nft_table *table,
507 const struct nlattr *nla)
508{
509 struct nft_chain *chain;
510
511 if (nla == NULL)
512 return ERR_PTR(-EINVAL);
513
514 list_for_each_entry(chain, &table->chains, list) {
515 if (!nla_strcmp(nla, chain->name))
516 return chain;
517 }
518
519 return ERR_PTR(-ENOENT);
520}
521
522static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
523 [NFTA_CHAIN_TABLE] = { .type = NLA_STRING },
524 [NFTA_CHAIN_HANDLE] = { .type = NLA_U64 },
525 [NFTA_CHAIN_NAME] = { .type = NLA_STRING,
526 .len = NFT_CHAIN_MAXNAMELEN - 1 },
527 [NFTA_CHAIN_HOOK] = { .type = NLA_NESTED },
528 [NFTA_CHAIN_POLICY] = { .type = NLA_U32 },
529 [NFTA_CHAIN_TYPE] = { .type = NLA_NUL_STRING },
530 [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED },
531};
532
533static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
534 [NFTA_HOOK_HOOKNUM] = { .type = NLA_U32 },
535 [NFTA_HOOK_PRIORITY] = { .type = NLA_U32 },
536};
537
538static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
539{
540 struct nft_stats *cpu_stats, total;
541 struct nlattr *nest;
542 int cpu;
543
544 memset(&total, 0, sizeof(total));
545 for_each_possible_cpu(cpu) {
546 cpu_stats = per_cpu_ptr(stats, cpu);
547 total.pkts += cpu_stats->pkts;
548 total.bytes += cpu_stats->bytes;
549 }
550 nest = nla_nest_start(skb, NFTA_CHAIN_COUNTERS);
551 if (nest == NULL)
552 goto nla_put_failure;
553
554 if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.pkts)) ||
555 nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)))
556 goto nla_put_failure;
557
558 nla_nest_end(skb, nest);
559 return 0;
560
561nla_put_failure:
562 return -ENOSPC;
563}
564
565static int nf_tables_fill_chain_info(struct sk_buff *skb, u32 portid, u32 seq,
566 int event, u32 flags, int family,
567 const struct nft_table *table,
568 const struct nft_chain *chain)
569{
570 struct nlmsghdr *nlh;
571 struct nfgenmsg *nfmsg;
572
573 event |= NFNL_SUBSYS_NFTABLES << 8;
574 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
575 if (nlh == NULL)
576 goto nla_put_failure;
577
578 nfmsg = nlmsg_data(nlh);
579 nfmsg->nfgen_family = family;
580 nfmsg->version = NFNETLINK_V0;
581 nfmsg->res_id = 0;
582
583 if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
584 goto nla_put_failure;
585 if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle)))
586 goto nla_put_failure;
587 if (nla_put_string(skb, NFTA_CHAIN_NAME, chain->name))
588 goto nla_put_failure;
589
590 if (chain->flags & NFT_BASE_CHAIN) {
591 const struct nft_base_chain *basechain = nft_base_chain(chain);
592 const struct nf_hook_ops *ops = &basechain->ops;
593 struct nlattr *nest;
594
595 nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
596 if (nest == NULL)
597 goto nla_put_failure;
598 if (nla_put_be32(skb, NFTA_HOOK_HOOKNUM, htonl(ops->hooknum)))
599 goto nla_put_failure;
600 if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
601 goto nla_put_failure;
602 nla_nest_end(skb, nest);
603
604 if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
605 htonl(basechain->policy)))
606 goto nla_put_failure;
607
608 if (nla_put_string(skb, NFTA_CHAIN_TYPE,
609 chain_type[ops->pf][nft_base_chain(chain)->type]->name))
610 goto nla_put_failure;
611
612 if (nft_dump_stats(skb, nft_base_chain(chain)->stats))
613 goto nla_put_failure;
614 }
615
616 if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
617 goto nla_put_failure;
618
619 return nlmsg_end(skb, nlh);
620
621nla_put_failure:
622 nlmsg_trim(skb, nlh);
623 return -1;
624}
625
626static int nf_tables_chain_notify(const struct sk_buff *oskb,
627 const struct nlmsghdr *nlh,
628 const struct nft_table *table,
629 const struct nft_chain *chain,
630 int event, int family)
631{
632 struct sk_buff *skb;
633 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
634 struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
635 u32 seq = nlh ? nlh->nlmsg_seq : 0;
636 bool report;
637 int err;
638
639 report = nlh ? nlmsg_report(nlh) : false;
640 if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
641 return 0;
642
643 err = -ENOBUFS;
644 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
645 if (skb == NULL)
646 goto err;
647
648 err = nf_tables_fill_chain_info(skb, portid, seq, event, 0, family,
649 table, chain);
650 if (err < 0) {
651 kfree_skb(skb);
652 goto err;
653 }
654
655 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
656 GFP_KERNEL);
657err:
658 if (err < 0)
659 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
660 return err;
661}
662
663static int nf_tables_dump_chains(struct sk_buff *skb,
664 struct netlink_callback *cb)
665{
666 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
667 const struct nft_af_info *afi;
668 const struct nft_table *table;
669 const struct nft_chain *chain;
670 unsigned int idx = 0, s_idx = cb->args[0];
671 struct net *net = sock_net(skb->sk);
672 int family = nfmsg->nfgen_family;
673
674 list_for_each_entry(afi, &net->nft.af_info, list) {
675 if (family != NFPROTO_UNSPEC && family != afi->family)
676 continue;
677
678 list_for_each_entry(table, &afi->tables, list) {
679 list_for_each_entry(chain, &table->chains, list) {
680 if (idx < s_idx)
681 goto cont;
682 if (idx > s_idx)
683 memset(&cb->args[1], 0,
684 sizeof(cb->args) - sizeof(cb->args[0]));
685 if (nf_tables_fill_chain_info(skb, NETLINK_CB(cb->skb).portid,
686 cb->nlh->nlmsg_seq,
687 NFT_MSG_NEWCHAIN,
688 NLM_F_MULTI,
689 afi->family, table, chain) < 0)
690 goto done;
691cont:
692 idx++;
693 }
694 }
695 }
696done:
697 cb->args[0] = idx;
698 return skb->len;
699}
700
701
702static int nf_tables_getchain(struct sock *nlsk, struct sk_buff *skb,
703 const struct nlmsghdr *nlh,
704 const struct nlattr * const nla[])
705{
706 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
707 const struct nft_af_info *afi;
708 const struct nft_table *table;
709 const struct nft_chain *chain;
710 struct sk_buff *skb2;
711 struct net *net = sock_net(skb->sk);
712 int family = nfmsg->nfgen_family;
713 int err;
714
715 if (nlh->nlmsg_flags & NLM_F_DUMP) {
716 struct netlink_dump_control c = {
717 .dump = nf_tables_dump_chains,
718 };
719 return netlink_dump_start(nlsk, skb, nlh, &c);
720 }
721
722 afi = nf_tables_afinfo_lookup(net, family, false);
723 if (IS_ERR(afi))
724 return PTR_ERR(afi);
725
726 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
727 if (IS_ERR(table))
728 return PTR_ERR(table);
729
730 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
731 if (IS_ERR(chain))
732 return PTR_ERR(chain);
733
734 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
735 if (!skb2)
736 return -ENOMEM;
737
738 err = nf_tables_fill_chain_info(skb2, NETLINK_CB(skb).portid,
739 nlh->nlmsg_seq, NFT_MSG_NEWCHAIN, 0,
740 family, table, chain);
741 if (err < 0)
742 goto err;
743
744 return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
745
746err:
747 kfree_skb(skb2);
748 return err;
749}
750
751static int
752nf_tables_chain_policy(struct nft_base_chain *chain, const struct nlattr *attr)
753{
754 switch (ntohl(nla_get_be32(attr))) {
755 case NF_DROP:
756 chain->policy = NF_DROP;
757 break;
758 case NF_ACCEPT:
759 chain->policy = NF_ACCEPT;
760 break;
761 default:
762 return -EINVAL;
763 }
764 return 0;
765}
766
767static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
768 [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
769 [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
770};
771
772static int
773nf_tables_counters(struct nft_base_chain *chain, const struct nlattr *attr)
774{
775 struct nlattr *tb[NFTA_COUNTER_MAX+1];
776 struct nft_stats __percpu *newstats;
777 struct nft_stats *stats;
778 int err;
779
780 err = nla_parse_nested(tb, NFTA_COUNTER_MAX, attr, nft_counter_policy);
781 if (err < 0)
782 return err;
783
784 if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
785 return -EINVAL;
786
787 newstats = alloc_percpu(struct nft_stats);
788 if (newstats == NULL)
789 return -ENOMEM;
790
791 /* Restore old counters on this cpu, no problem. Per-cpu statistics
792 * are not exposed to userspace.
793 */
794 stats = this_cpu_ptr(newstats);
795 stats->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
796 stats->pkts = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
797
798 if (chain->stats) {
799 /* nfnl_lock is held, add some nfnl function for this, later */
800 struct nft_stats __percpu *oldstats =
801 rcu_dereference_protected(chain->stats, 1);
802
803 rcu_assign_pointer(chain->stats, newstats);
804 synchronize_rcu();
805 free_percpu(oldstats);
806 } else
807 rcu_assign_pointer(chain->stats, newstats);
808
809 return 0;
810}
811
812static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
813 const struct nlmsghdr *nlh,
814 const struct nlattr * const nla[])
815{
816 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
817 const struct nlattr * uninitialized_var(name);
818 const struct nft_af_info *afi;
819 struct nft_table *table;
820 struct nft_chain *chain;
821 struct nft_base_chain *basechain = NULL;
822 struct nlattr *ha[NFTA_HOOK_MAX + 1];
823 struct net *net = sock_net(skb->sk);
824 int family = nfmsg->nfgen_family;
825 u64 handle = 0;
826 int err;
827 bool create;
828
829 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
830
831 afi = nf_tables_afinfo_lookup(net, family, true);
832 if (IS_ERR(afi))
833 return PTR_ERR(afi);
834
835 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
836 if (IS_ERR(table))
837 return PTR_ERR(table);
838
839 if (table->use == UINT_MAX)
840 return -EOVERFLOW;
841
842 chain = NULL;
843 name = nla[NFTA_CHAIN_NAME];
844
845 if (nla[NFTA_CHAIN_HANDLE]) {
846 handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE]));
847 chain = nf_tables_chain_lookup_byhandle(table, handle);
848 if (IS_ERR(chain))
849 return PTR_ERR(chain);
850 } else {
851 chain = nf_tables_chain_lookup(table, name);
852 if (IS_ERR(chain)) {
853 if (PTR_ERR(chain) != -ENOENT)
854 return PTR_ERR(chain);
855 chain = NULL;
856 }
857 }
858
859 if (chain != NULL) {
860 if (nlh->nlmsg_flags & NLM_F_EXCL)
861 return -EEXIST;
862 if (nlh->nlmsg_flags & NLM_F_REPLACE)
863 return -EOPNOTSUPP;
864
865 if (nla[NFTA_CHAIN_HANDLE] && name &&
866 !IS_ERR(nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME])))
867 return -EEXIST;
868
869 if (nla[NFTA_CHAIN_POLICY]) {
870 if (!(chain->flags & NFT_BASE_CHAIN))
871 return -EOPNOTSUPP;
872
873 err = nf_tables_chain_policy(nft_base_chain(chain),
874 nla[NFTA_CHAIN_POLICY]);
875 if (err < 0)
876 return err;
877 }
878
879 if (nla[NFTA_CHAIN_COUNTERS]) {
880 if (!(chain->flags & NFT_BASE_CHAIN))
881 return -EOPNOTSUPP;
882
883 err = nf_tables_counters(nft_base_chain(chain),
884 nla[NFTA_CHAIN_COUNTERS]);
885 if (err < 0)
886 return err;
887 }
888
889 if (nla[NFTA_CHAIN_HANDLE] && name)
890 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
891
892 goto notify;
893 }
894
895 if (nla[NFTA_CHAIN_HOOK]) {
896 struct nf_hook_ops *ops;
897 nf_hookfn *hookfn;
898 u32 hooknum;
899 int type = NFT_CHAIN_T_DEFAULT;
900
901 if (nla[NFTA_CHAIN_TYPE]) {
902 type = nf_tables_chain_type_lookup(afi,
903 nla[NFTA_CHAIN_TYPE],
904 create);
905 if (type < 0)
906 return -ENOENT;
907 }
908
909 err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
910 nft_hook_policy);
911 if (err < 0)
912 return err;
913 if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
914 ha[NFTA_HOOK_PRIORITY] == NULL)
915 return -EINVAL;
916
917 hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
918 if (hooknum >= afi->nhooks)
919 return -EINVAL;
920
921 hookfn = chain_type[family][type]->fn[hooknum];
922 if (hookfn == NULL)
923 return -EOPNOTSUPP;
924
925 basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
926 if (basechain == NULL)
927 return -ENOMEM;
928
929 basechain->type = type;
930 chain = &basechain->chain;
931
932 ops = &basechain->ops;
933 ops->pf = family;
934 ops->owner = afi->owner;
935 ops->hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
936 ops->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
937 ops->priv = chain;
938 ops->hook = hookfn;
939 if (afi->hooks[ops->hooknum])
940 ops->hook = afi->hooks[ops->hooknum];
941
942 chain->flags |= NFT_BASE_CHAIN;
943
944 if (nla[NFTA_CHAIN_POLICY]) {
945 err = nf_tables_chain_policy(basechain,
946 nla[NFTA_CHAIN_POLICY]);
947 if (err < 0) {
948 free_percpu(basechain->stats);
949 kfree(basechain);
950 return err;
951 }
952 } else
953 basechain->policy = NF_ACCEPT;
954
955 if (nla[NFTA_CHAIN_COUNTERS]) {
956 err = nf_tables_counters(basechain,
957 nla[NFTA_CHAIN_COUNTERS]);
958 if (err < 0) {
959 free_percpu(basechain->stats);
960 kfree(basechain);
961 return err;
962 }
963 } else {
964 struct nft_stats __percpu *newstats;
965
966 newstats = alloc_percpu(struct nft_stats);
967 if (newstats == NULL)
968 return -ENOMEM;
969
970 rcu_assign_pointer(nft_base_chain(chain)->stats,
971 newstats);
972 }
973 } else {
974 chain = kzalloc(sizeof(*chain), GFP_KERNEL);
975 if (chain == NULL)
976 return -ENOMEM;
977 }
978
979 INIT_LIST_HEAD(&chain->rules);
980 chain->handle = nf_tables_alloc_handle(table);
981 chain->net = net;
982 chain->table = table;
983 nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN);
984
985 if (!(table->flags & NFT_TABLE_F_DORMANT) &&
986 chain->flags & NFT_BASE_CHAIN) {
987 err = nf_register_hook(&nft_base_chain(chain)->ops);
988 if (err < 0) {
989 free_percpu(basechain->stats);
990 kfree(basechain);
991 return err;
992 }
993 }
994 list_add_tail(&chain->list, &table->chains);
995 table->use++;
996notify:
997 nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_NEWCHAIN,
998 family);
999 return 0;
1000}
1001
1002static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
1003{
1004 struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
1005
1006 BUG_ON(chain->use > 0);
1007
1008 if (chain->flags & NFT_BASE_CHAIN) {
1009 free_percpu(nft_base_chain(chain)->stats);
1010 kfree(nft_base_chain(chain));
1011 } else
1012 kfree(chain);
1013}
1014
1015static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
1016 const struct nlmsghdr *nlh,
1017 const struct nlattr * const nla[])
1018{
1019 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1020 const struct nft_af_info *afi;
1021 struct nft_table *table;
1022 struct nft_chain *chain;
1023 struct net *net = sock_net(skb->sk);
1024 int family = nfmsg->nfgen_family;
1025
1026 afi = nf_tables_afinfo_lookup(net, family, false);
1027 if (IS_ERR(afi))
1028 return PTR_ERR(afi);
1029
1030 table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE]);
1031 if (IS_ERR(table))
1032 return PTR_ERR(table);
1033
1034 chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME]);
1035 if (IS_ERR(chain))
1036 return PTR_ERR(chain);
1037
1038 if (!list_empty(&chain->rules))
1039 return -EBUSY;
1040
1041 list_del(&chain->list);
1042 table->use--;
1043
1044 if (!(table->flags & NFT_TABLE_F_DORMANT) &&
1045 chain->flags & NFT_BASE_CHAIN)
1046 nf_unregister_hook(&nft_base_chain(chain)->ops);
1047
1048 nf_tables_chain_notify(skb, nlh, table, chain, NFT_MSG_DELCHAIN,
1049 family);
1050
1051 /* Make sure all rule references are gone before this is released */
1052 call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
1053 return 0;
1054}
1055
1056static void nft_ctx_init(struct nft_ctx *ctx,
1057 const struct sk_buff *skb,
1058 const struct nlmsghdr *nlh,
1059 const struct nft_af_info *afi,
1060 const struct nft_table *table,
1061 const struct nft_chain *chain,
1062 const struct nlattr * const *nla)
1063{
1064 ctx->net = sock_net(skb->sk);
1065 ctx->skb = skb;
1066 ctx->nlh = nlh;
1067 ctx->afi = afi;
1068 ctx->table = table;
1069 ctx->chain = chain;
1070 ctx->nla = nla;
1071}
1072
1073/*
1074 * Expressions
1075 */
1076
1077/**
1078 * nft_register_expr - register nf_tables expr type
1079 * @ops: expr type
1080 *
1081 * Registers the expr type for use with nf_tables. Returns zero on
1082 * success or a negative errno code otherwise.
1083 */
1084int nft_register_expr(struct nft_expr_type *type)
1085{
1086 nfnl_lock(NFNL_SUBSYS_NFTABLES);
1087 list_add_tail(&type->list, &nf_tables_expressions);
1088 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1089 return 0;
1090}
1091EXPORT_SYMBOL_GPL(nft_register_expr);
1092
1093/**
1094 * nft_unregister_expr - unregister nf_tables expr type
1095 * @ops: expr type
1096 *
1097 * Unregisters the expr typefor use with nf_tables.
1098 */
1099void nft_unregister_expr(struct nft_expr_type *type)
1100{
1101 nfnl_lock(NFNL_SUBSYS_NFTABLES);
1102 list_del(&type->list);
1103 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1104}
1105EXPORT_SYMBOL_GPL(nft_unregister_expr);
1106
1107static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla)
1108{
1109 const struct nft_expr_type *type;
1110
1111 list_for_each_entry(type, &nf_tables_expressions, list) {
1112 if (!nla_strcmp(nla, type->name))
1113 return type;
1114 }
1115 return NULL;
1116}
1117
1118static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla)
1119{
1120 const struct nft_expr_type *type;
1121
1122 if (nla == NULL)
1123 return ERR_PTR(-EINVAL);
1124
1125 type = __nft_expr_type_get(nla);
1126 if (type != NULL && try_module_get(type->owner))
1127 return type;
1128
1129#ifdef CONFIG_MODULES
1130 if (type == NULL) {
1131 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1132 request_module("nft-expr-%.*s",
1133 nla_len(nla), (char *)nla_data(nla));
1134 nfnl_lock(NFNL_SUBSYS_NFTABLES);
1135 if (__nft_expr_type_get(nla))
1136 return ERR_PTR(-EAGAIN);
1137 }
1138#endif
1139 return ERR_PTR(-ENOENT);
1140}
1141
1142static const struct nla_policy nft_expr_policy[NFTA_EXPR_MAX + 1] = {
1143 [NFTA_EXPR_NAME] = { .type = NLA_STRING },
1144 [NFTA_EXPR_DATA] = { .type = NLA_NESTED },
1145};
1146
1147static int nf_tables_fill_expr_info(struct sk_buff *skb,
1148 const struct nft_expr *expr)
1149{
1150 if (nla_put_string(skb, NFTA_EXPR_NAME, expr->ops->type->name))
1151 goto nla_put_failure;
1152
1153 if (expr->ops->dump) {
1154 struct nlattr *data = nla_nest_start(skb, NFTA_EXPR_DATA);
1155 if (data == NULL)
1156 goto nla_put_failure;
1157 if (expr->ops->dump(skb, expr) < 0)
1158 goto nla_put_failure;
1159 nla_nest_end(skb, data);
1160 }
1161
1162 return skb->len;
1163
1164nla_put_failure:
1165 return -1;
1166};
1167
1168struct nft_expr_info {
1169 const struct nft_expr_ops *ops;
1170 struct nlattr *tb[NFT_EXPR_MAXATTR + 1];
1171};
1172
1173static int nf_tables_expr_parse(const struct nft_ctx *ctx,
1174 const struct nlattr *nla,
1175 struct nft_expr_info *info)
1176{
1177 const struct nft_expr_type *type;
1178 const struct nft_expr_ops *ops;
1179 struct nlattr *tb[NFTA_EXPR_MAX + 1];
1180 int err;
1181
1182 err = nla_parse_nested(tb, NFTA_EXPR_MAX, nla, nft_expr_policy);
1183 if (err < 0)
1184 return err;
1185
1186 type = nft_expr_type_get(tb[NFTA_EXPR_NAME]);
1187 if (IS_ERR(type))
1188 return PTR_ERR(type);
1189
1190 if (tb[NFTA_EXPR_DATA]) {
1191 err = nla_parse_nested(info->tb, type->maxattr,
1192 tb[NFTA_EXPR_DATA], type->policy);
1193 if (err < 0)
1194 goto err1;
1195 } else
1196 memset(info->tb, 0, sizeof(info->tb[0]) * (type->maxattr + 1));
1197
1198 if (type->select_ops != NULL) {
1199 ops = type->select_ops(ctx,
1200 (const struct nlattr * const *)info->tb);
1201 if (IS_ERR(ops)) {
1202 err = PTR_ERR(ops);
1203 goto err1;
1204 }
1205 } else
1206 ops = type->ops;
1207
1208 info->ops = ops;
1209 return 0;
1210
1211err1:
1212 module_put(type->owner);
1213 return err;
1214}
1215
1216static int nf_tables_newexpr(const struct nft_ctx *ctx,
1217 const struct nft_expr_info *info,
1218 struct nft_expr *expr)
1219{
1220 const struct nft_expr_ops *ops = info->ops;
1221 int err;
1222
1223 expr->ops = ops;
1224 if (ops->init) {
1225 err = ops->init(ctx, expr, (const struct nlattr **)info->tb);
1226 if (err < 0)
1227 goto err1;
1228 }
1229
1230 return 0;
1231
1232err1:
1233 expr->ops = NULL;
1234 return err;
1235}
1236
1237static void nf_tables_expr_destroy(struct nft_expr *expr)
1238{
1239 if (expr->ops->destroy)
1240 expr->ops->destroy(expr);
1241 module_put(expr->ops->type->owner);
1242}
1243
1244/*
1245 * Rules
1246 */
1247
1248static struct nft_rule *__nf_tables_rule_lookup(const struct nft_chain *chain,
1249 u64 handle)
1250{
1251 struct nft_rule *rule;
1252
1253 // FIXME: this sucks
1254 list_for_each_entry(rule, &chain->rules, list) {
1255 if (handle == rule->handle)
1256 return rule;
1257 }
1258
1259 return ERR_PTR(-ENOENT);
1260}
1261
1262static struct nft_rule *nf_tables_rule_lookup(const struct nft_chain *chain,
1263 const struct nlattr *nla)
1264{
1265 if (nla == NULL)
1266 return ERR_PTR(-EINVAL);
1267
1268 return __nf_tables_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
1269}
1270
1271static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
1272 [NFTA_RULE_TABLE] = { .type = NLA_STRING },
1273 [NFTA_RULE_CHAIN] = { .type = NLA_STRING,
1274 .len = NFT_CHAIN_MAXNAMELEN - 1 },
1275 [NFTA_RULE_HANDLE] = { .type = NLA_U64 },
1276 [NFTA_RULE_EXPRESSIONS] = { .type = NLA_NESTED },
1277 [NFTA_RULE_COMPAT] = { .type = NLA_NESTED },
1278 [NFTA_RULE_POSITION] = { .type = NLA_U64 },
1279};
1280
1281static int nf_tables_fill_rule_info(struct sk_buff *skb, u32 portid, u32 seq,
1282 int event, u32 flags, int family,
1283 const struct nft_table *table,
1284 const struct nft_chain *chain,
1285 const struct nft_rule *rule)
1286{
1287 struct nlmsghdr *nlh;
1288 struct nfgenmsg *nfmsg;
1289 const struct nft_expr *expr, *next;
1290 struct nlattr *list;
1291 const struct nft_rule *prule;
1292 int type = event | NFNL_SUBSYS_NFTABLES << 8;
1293
1294 nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg),
1295 flags);
1296 if (nlh == NULL)
1297 goto nla_put_failure;
1298
1299 nfmsg = nlmsg_data(nlh);
1300 nfmsg->nfgen_family = family;
1301 nfmsg->version = NFNETLINK_V0;
1302 nfmsg->res_id = 0;
1303
1304 if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
1305 goto nla_put_failure;
1306 if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
1307 goto nla_put_failure;
1308 if (nla_put_be64(skb, NFTA_RULE_HANDLE, cpu_to_be64(rule->handle)))
1309 goto nla_put_failure;
1310
1311 if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) {
1312 prule = list_entry(rule->list.prev, struct nft_rule, list);
1313 if (nla_put_be64(skb, NFTA_RULE_POSITION,
1314 cpu_to_be64(prule->handle)))
1315 goto nla_put_failure;
1316 }
1317
1318 list = nla_nest_start(skb, NFTA_RULE_EXPRESSIONS);
1319 if (list == NULL)
1320 goto nla_put_failure;
1321 nft_rule_for_each_expr(expr, next, rule) {
1322 struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM);
1323 if (elem == NULL)
1324 goto nla_put_failure;
1325 if (nf_tables_fill_expr_info(skb, expr) < 0)
1326 goto nla_put_failure;
1327 nla_nest_end(skb, elem);
1328 }
1329 nla_nest_end(skb, list);
1330
1331 return nlmsg_end(skb, nlh);
1332
1333nla_put_failure:
1334 nlmsg_trim(skb, nlh);
1335 return -1;
1336}
1337
1338static int nf_tables_rule_notify(const struct sk_buff *oskb,
1339 const struct nlmsghdr *nlh,
1340 const struct nft_table *table,
1341 const struct nft_chain *chain,
1342 const struct nft_rule *rule,
1343 int event, u32 flags, int family)
1344{
1345 struct sk_buff *skb;
1346 u32 portid = NETLINK_CB(oskb).portid;
1347 struct net *net = oskb ? sock_net(oskb->sk) : &init_net;
1348 u32 seq = nlh->nlmsg_seq;
1349 bool report;
1350 int err;
1351
1352 report = nlmsg_report(nlh);
1353 if (!report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
1354 return 0;
1355
1356 err = -ENOBUFS;
1357 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
1358 if (skb == NULL)
1359 goto err;
1360
1361 err = nf_tables_fill_rule_info(skb, portid, seq, event, flags,
1362 family, table, chain, rule);
1363 if (err < 0) {
1364 kfree_skb(skb);
1365 goto err;
1366 }
1367
1368 err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report,
1369 GFP_KERNEL);
1370err:
1371 if (err < 0)
1372 nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
1373 return err;
1374}
1375
1376static inline bool
1377nft_rule_is_active(struct net *net, const struct nft_rule *rule)
1378{
1379 return (rule->genmask & (1 << net->nft.gencursor)) == 0;
1380}
1381
1382static inline int gencursor_next(struct net *net)
1383{
1384 return net->nft.gencursor+1 == 1 ? 1 : 0;
1385}
1386
1387static inline int
1388nft_rule_is_active_next(struct net *net, const struct nft_rule *rule)
1389{
1390 return (rule->genmask & (1 << gencursor_next(net))) == 0;
1391}
1392
1393static inline void
1394nft_rule_activate_next(struct net *net, struct nft_rule *rule)
1395{
1396 /* Now inactive, will be active in the future */
1397 rule->genmask = (1 << net->nft.gencursor);
1398}
1399
1400static inline void
1401nft_rule_disactivate_next(struct net *net, struct nft_rule *rule)
1402{
1403 rule->genmask = (1 << gencursor_next(net));
1404}
1405
1406static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
1407{
1408 rule->genmask = 0;
1409}
1410
1411static int nf_tables_dump_rules(struct sk_buff *skb,
1412 struct netlink_callback *cb)
1413{
1414 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
1415 const struct nft_af_info *afi;
1416 const struct nft_table *table;
1417 const struct nft_chain *chain;
1418 const struct nft_rule *rule;
1419 unsigned int idx = 0, s_idx = cb->args[0];
1420 struct net *net = sock_net(skb->sk);
1421 int family = nfmsg->nfgen_family;
1422 u8 genctr = ACCESS_ONCE(net->nft.genctr);
1423 u8 gencursor = ACCESS_ONCE(net->nft.gencursor);
1424
1425 list_for_each_entry(afi, &net->nft.af_info, list) {
1426 if (family != NFPROTO_UNSPEC && family != afi->family)
1427 continue;
1428
1429 list_for_each_entry(table, &afi->tables, list) {
1430 list_for_each_entry(chain, &table->chains, list) {
1431 list_for_each_entry(rule, &chain->rules, list) {
1432 if (!nft_rule_is_active(net, rule))
1433 goto cont;
1434 if (idx < s_idx)
1435 goto cont;
1436 if (idx > s_idx)
1437 memset(&cb->args[1], 0,
1438 sizeof(cb->args) - sizeof(cb->args[0]));
1439 if (nf_tables_fill_rule_info(skb, NETLINK_CB(cb->skb).portid,
1440 cb->nlh->nlmsg_seq,
1441 NFT_MSG_NEWRULE,
1442 NLM_F_MULTI | NLM_F_APPEND,
1443 afi->family, table, chain, rule) < 0)
1444 goto done;
1445cont:
1446 idx++;
1447 }
1448 }
1449 }
1450 }
1451done:
1452 /* Invalidate this dump, a transition to the new generation happened */
1453 if (gencursor != net->nft.gencursor || genctr != net->nft.genctr)
1454 return -EBUSY;
1455
1456 cb->args[0] = idx;
1457 return skb->len;
1458}
1459
1460static int nf_tables_getrule(struct sock *nlsk, struct sk_buff *skb,
1461 const struct nlmsghdr *nlh,
1462 const struct nlattr * const nla[])
1463{
1464 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1465 const struct nft_af_info *afi;
1466 const struct nft_table *table;
1467 const struct nft_chain *chain;
1468 const struct nft_rule *rule;
1469 struct sk_buff *skb2;
1470 struct net *net = sock_net(skb->sk);
1471 int family = nfmsg->nfgen_family;
1472 int err;
1473
1474 if (nlh->nlmsg_flags & NLM_F_DUMP) {
1475 struct netlink_dump_control c = {
1476 .dump = nf_tables_dump_rules,
1477 };
1478 return netlink_dump_start(nlsk, skb, nlh, &c);
1479 }
1480
1481 afi = nf_tables_afinfo_lookup(net, family, false);
1482 if (IS_ERR(afi))
1483 return PTR_ERR(afi);
1484
1485 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
1486 if (IS_ERR(table))
1487 return PTR_ERR(table);
1488
1489 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
1490 if (IS_ERR(chain))
1491 return PTR_ERR(chain);
1492
1493 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
1494 if (IS_ERR(rule))
1495 return PTR_ERR(rule);
1496
1497 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1498 if (!skb2)
1499 return -ENOMEM;
1500
1501 err = nf_tables_fill_rule_info(skb2, NETLINK_CB(skb).portid,
1502 nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
1503 family, table, chain, rule);
1504 if (err < 0)
1505 goto err;
1506
1507 return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
1508
1509err:
1510 kfree_skb(skb2);
1511 return err;
1512}
1513
1514static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
1515{
1516 struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
1517 struct nft_expr *expr;
1518
1519 /*
1520 * Careful: some expressions might not be initialized in case this
1521 * is called on error from nf_tables_newrule().
1522 */
1523 expr = nft_expr_first(rule);
1524 while (expr->ops && expr != nft_expr_last(rule)) {
1525 nf_tables_expr_destroy(expr);
1526 expr = nft_expr_next(expr);
1527 }
1528 kfree(rule);
1529}
1530
1531static void nf_tables_rule_destroy(struct nft_rule *rule)
1532{
1533 call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
1534}
1535
1536#define NFT_RULE_MAXEXPRS 128
1537
1538static struct nft_expr_info *info;
1539
1540static struct nft_rule_trans *
1541nf_tables_trans_add(struct nft_rule *rule, const struct nft_ctx *ctx)
1542{
1543 struct nft_rule_trans *rupd;
1544
1545 rupd = kmalloc(sizeof(struct nft_rule_trans), GFP_KERNEL);
1546 if (rupd == NULL)
1547 return NULL;
1548
1549 rupd->chain = ctx->chain;
1550 rupd->table = ctx->table;
1551 rupd->rule = rule;
1552 rupd->family = ctx->afi->family;
1553 rupd->nlh = ctx->nlh;
1554 list_add_tail(&rupd->list, &ctx->net->nft.commit_list);
1555
1556 return rupd;
1557}
1558
1559static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
1560 const struct nlmsghdr *nlh,
1561 const struct nlattr * const nla[])
1562{
1563 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1564 const struct nft_af_info *afi;
1565 struct net *net = sock_net(skb->sk);
1566 struct nft_table *table;
1567 struct nft_chain *chain;
1568 struct nft_rule *rule, *old_rule = NULL;
1569 struct nft_rule_trans *repl = NULL;
1570 struct nft_expr *expr;
1571 struct nft_ctx ctx;
1572 struct nlattr *tmp;
1573 unsigned int size, i, n;
1574 int err, rem;
1575 bool create;
1576 u64 handle, pos_handle;
1577
1578 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
1579
1580 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
1581 if (IS_ERR(afi))
1582 return PTR_ERR(afi);
1583
1584 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
1585 if (IS_ERR(table))
1586 return PTR_ERR(table);
1587
1588 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
1589 if (IS_ERR(chain))
1590 return PTR_ERR(chain);
1591
1592 if (nla[NFTA_RULE_HANDLE]) {
1593 handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
1594 rule = __nf_tables_rule_lookup(chain, handle);
1595 if (IS_ERR(rule))
1596 return PTR_ERR(rule);
1597
1598 if (nlh->nlmsg_flags & NLM_F_EXCL)
1599 return -EEXIST;
1600 if (nlh->nlmsg_flags & NLM_F_REPLACE)
1601 old_rule = rule;
1602 else
1603 return -EOPNOTSUPP;
1604 } else {
1605 if (!create || nlh->nlmsg_flags & NLM_F_REPLACE)
1606 return -EINVAL;
1607 handle = nf_tables_alloc_handle(table);
1608 }
1609
1610 if (nla[NFTA_RULE_POSITION]) {
1611 if (!(nlh->nlmsg_flags & NLM_F_CREATE))
1612 return -EOPNOTSUPP;
1613
1614 pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
1615 old_rule = __nf_tables_rule_lookup(chain, pos_handle);
1616 if (IS_ERR(old_rule))
1617 return PTR_ERR(old_rule);
1618 }
1619
1620 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1621
1622 n = 0;
1623 size = 0;
1624 if (nla[NFTA_RULE_EXPRESSIONS]) {
1625 nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) {
1626 err = -EINVAL;
1627 if (nla_type(tmp) != NFTA_LIST_ELEM)
1628 goto err1;
1629 if (n == NFT_RULE_MAXEXPRS)
1630 goto err1;
1631 err = nf_tables_expr_parse(&ctx, tmp, &info[n]);
1632 if (err < 0)
1633 goto err1;
1634 size += info[n].ops->size;
1635 n++;
1636 }
1637 }
1638
1639 err = -ENOMEM;
1640 rule = kzalloc(sizeof(*rule) + size, GFP_KERNEL);
1641 if (rule == NULL)
1642 goto err1;
1643
1644 nft_rule_activate_next(net, rule);
1645
1646 rule->handle = handle;
1647 rule->dlen = size;
1648
1649 expr = nft_expr_first(rule);
1650 for (i = 0; i < n; i++) {
1651 err = nf_tables_newexpr(&ctx, &info[i], expr);
1652 if (err < 0)
1653 goto err2;
1654 info[i].ops = NULL;
1655 expr = nft_expr_next(expr);
1656 }
1657
1658 if (nlh->nlmsg_flags & NLM_F_REPLACE) {
1659 if (nft_rule_is_active_next(net, old_rule)) {
1660 repl = nf_tables_trans_add(old_rule, &ctx);
1661 if (repl == NULL) {
1662 err = -ENOMEM;
1663 goto err2;
1664 }
1665 nft_rule_disactivate_next(net, old_rule);
1666 list_add_tail(&rule->list, &old_rule->list);
1667 } else {
1668 err = -ENOENT;
1669 goto err2;
1670 }
1671 } else if (nlh->nlmsg_flags & NLM_F_APPEND)
1672 if (old_rule)
1673 list_add_rcu(&rule->list, &old_rule->list);
1674 else
1675 list_add_tail_rcu(&rule->list, &chain->rules);
1676 else {
1677 if (old_rule)
1678 list_add_tail_rcu(&rule->list, &old_rule->list);
1679 else
1680 list_add_rcu(&rule->list, &chain->rules);
1681 }
1682
1683 if (nf_tables_trans_add(rule, &ctx) == NULL) {
1684 err = -ENOMEM;
1685 goto err3;
1686 }
1687 return 0;
1688
1689err3:
1690 list_del_rcu(&rule->list);
1691 if (repl) {
1692 list_del_rcu(&repl->rule->list);
1693 list_del(&repl->list);
1694 nft_rule_clear(net, repl->rule);
1695 kfree(repl);
1696 }
1697err2:
1698 nf_tables_rule_destroy(rule);
1699err1:
1700 for (i = 0; i < n; i++) {
1701 if (info[i].ops != NULL)
1702 module_put(info[i].ops->type->owner);
1703 }
1704 return err;
1705}
1706
1707static int
1708nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule)
1709{
1710 /* You cannot delete the same rule twice */
1711 if (nft_rule_is_active_next(ctx->net, rule)) {
1712 if (nf_tables_trans_add(rule, ctx) == NULL)
1713 return -ENOMEM;
1714 nft_rule_disactivate_next(ctx->net, rule);
1715 return 0;
1716 }
1717 return -ENOENT;
1718}
1719
1720static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb,
1721 const struct nlmsghdr *nlh,
1722 const struct nlattr * const nla[])
1723{
1724 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1725 const struct nft_af_info *afi;
1726 struct net *net = sock_net(skb->sk);
1727 const struct nft_table *table;
1728 struct nft_chain *chain;
1729 struct nft_rule *rule, *tmp;
1730 int family = nfmsg->nfgen_family, err = 0;
1731 struct nft_ctx ctx;
1732
1733 afi = nf_tables_afinfo_lookup(net, family, false);
1734 if (IS_ERR(afi))
1735 return PTR_ERR(afi);
1736
1737 table = nf_tables_table_lookup(afi, nla[NFTA_RULE_TABLE]);
1738 if (IS_ERR(table))
1739 return PTR_ERR(table);
1740
1741 chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]);
1742 if (IS_ERR(chain))
1743 return PTR_ERR(chain);
1744
1745 nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla);
1746
1747 if (nla[NFTA_RULE_HANDLE]) {
1748 rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
1749 if (IS_ERR(rule))
1750 return PTR_ERR(rule);
1751
1752 err = nf_tables_delrule_one(&ctx, rule);
1753 } else {
1754 /* Remove all rules in this chain */
1755 list_for_each_entry_safe(rule, tmp, &chain->rules, list) {
1756 err = nf_tables_delrule_one(&ctx, rule);
1757 if (err < 0)
1758 break;
1759 }
1760 }
1761
1762 return err;
1763}
1764
1765static int nf_tables_commit(struct sk_buff *skb)
1766{
1767 struct net *net = sock_net(skb->sk);
1768 struct nft_rule_trans *rupd, *tmp;
1769
1770 /* Bump generation counter, invalidate any dump in progress */
1771 net->nft.genctr++;
1772
1773 /* A new generation has just started */
1774 net->nft.gencursor = gencursor_next(net);
1775
1776 /* Make sure all packets have left the previous generation before
1777 * purging old rules.
1778 */
1779 synchronize_rcu();
1780
1781 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1782 /* Delete this rule from the dirty list */
1783 list_del(&rupd->list);
1784
1785 /* This rule was inactive in the past and just became active.
1786 * Clear the next bit of the genmask since its meaning has
1787 * changed, now it is the future.
1788 */
1789 if (nft_rule_is_active(net, rupd->rule)) {
1790 nft_rule_clear(net, rupd->rule);
1791 nf_tables_rule_notify(skb, rupd->nlh, rupd->table,
1792 rupd->chain, rupd->rule,
1793 NFT_MSG_NEWRULE, 0,
1794 rupd->family);
1795 kfree(rupd);
1796 continue;
1797 }
1798
1799 /* This rule is in the past, get rid of it */
1800 list_del_rcu(&rupd->rule->list);
1801 nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
1802 rupd->rule, NFT_MSG_DELRULE, 0,
1803 rupd->family);
1804 nf_tables_rule_destroy(rupd->rule);
1805 kfree(rupd);
1806 }
1807
1808 return 0;
1809}
1810
1811static int nf_tables_abort(struct sk_buff *skb)
1812{
1813 struct net *net = sock_net(skb->sk);
1814 struct nft_rule_trans *rupd, *tmp;
1815
1816 list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
1817 /* Delete all rules from the dirty list */
1818 list_del(&rupd->list);
1819
1820 if (!nft_rule_is_active_next(net, rupd->rule)) {
1821 nft_rule_clear(net, rupd->rule);
1822 kfree(rupd);
1823 continue;
1824 }
1825
1826 /* This rule is inactive, get rid of it */
1827 list_del_rcu(&rupd->rule->list);
1828 nf_tables_rule_destroy(rupd->rule);
1829 kfree(rupd);
1830 }
1831 return 0;
1832}
1833
1834/*
1835 * Sets
1836 */
1837
1838static LIST_HEAD(nf_tables_set_ops);
1839
1840int nft_register_set(struct nft_set_ops *ops)
1841{
1842 nfnl_lock(NFNL_SUBSYS_NFTABLES);
1843 list_add_tail(&ops->list, &nf_tables_set_ops);
1844 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1845 return 0;
1846}
1847EXPORT_SYMBOL_GPL(nft_register_set);
1848
1849void nft_unregister_set(struct nft_set_ops *ops)
1850{
1851 nfnl_lock(NFNL_SUBSYS_NFTABLES);
1852 list_del(&ops->list);
1853 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1854}
1855EXPORT_SYMBOL_GPL(nft_unregister_set);
1856
1857static const struct nft_set_ops *nft_select_set_ops(const struct nlattr * const nla[])
1858{
1859 const struct nft_set_ops *ops;
1860 u32 features;
1861
1862#ifdef CONFIG_MODULES
1863 if (list_empty(&nf_tables_set_ops)) {
1864 nfnl_unlock(NFNL_SUBSYS_NFTABLES);
1865 request_module("nft-set");
1866 nfnl_lock(NFNL_SUBSYS_NFTABLES);
1867 if (!list_empty(&nf_tables_set_ops))
1868 return ERR_PTR(-EAGAIN);
1869 }
1870#endif
1871 features = 0;
1872 if (nla[NFTA_SET_FLAGS] != NULL) {
1873 features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
1874 features &= NFT_SET_INTERVAL | NFT_SET_MAP;
1875 }
1876
1877 // FIXME: implement selection properly
1878 list_for_each_entry(ops, &nf_tables_set_ops, list) {
1879 if ((ops->features & features) != features)
1880 continue;
1881 if (!try_module_get(ops->owner))
1882 continue;
1883 return ops;
1884 }
1885
1886 return ERR_PTR(-EOPNOTSUPP);
1887}
1888
1889static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
1890 [NFTA_SET_TABLE] = { .type = NLA_STRING },
1891 [NFTA_SET_NAME] = { .type = NLA_STRING },
1892 [NFTA_SET_FLAGS] = { .type = NLA_U32 },
1893 [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 },
1894 [NFTA_SET_KEY_LEN] = { .type = NLA_U32 },
1895 [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 },
1896 [NFTA_SET_DATA_LEN] = { .type = NLA_U32 },
1897};
1898
1899static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
1900 const struct sk_buff *skb,
1901 const struct nlmsghdr *nlh,
1902 const struct nlattr * const nla[])
1903{
1904 struct net *net = sock_net(skb->sk);
1905 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
1906 const struct nft_af_info *afi;
1907 const struct nft_table *table = NULL;
1908
1909 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
1910 if (IS_ERR(afi))
1911 return PTR_ERR(afi);
1912
1913 if (nla[NFTA_SET_TABLE] != NULL) {
1914 table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
1915 if (IS_ERR(table))
1916 return PTR_ERR(table);
1917 }
1918
1919 nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
1920 return 0;
1921}
1922
1923struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
1924 const struct nlattr *nla)
1925{
1926 struct nft_set *set;
1927
1928 if (nla == NULL)
1929 return ERR_PTR(-EINVAL);
1930
1931 list_for_each_entry(set, &table->sets, list) {
1932 if (!nla_strcmp(nla, set->name))
1933 return set;
1934 }
1935 return ERR_PTR(-ENOENT);
1936}
1937
1938static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
1939 const char *name)
1940{
1941 const struct nft_set *i;
1942 const char *p;
1943 unsigned long *inuse;
1944 unsigned int n = 0;
1945
1946 p = strnchr(name, IFNAMSIZ, '%');
1947 if (p != NULL) {
1948 if (p[1] != 'd' || strchr(p + 2, '%'))
1949 return -EINVAL;
1950
1951 inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
1952 if (inuse == NULL)
1953 return -ENOMEM;
1954
1955 list_for_each_entry(i, &ctx->table->sets, list) {
1956 if (!sscanf(i->name, name, &n))
1957 continue;
1958 if (n < 0 || n > BITS_PER_LONG * PAGE_SIZE)
1959 continue;
1960 set_bit(n, inuse);
1961 }
1962
1963 n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
1964 free_page((unsigned long)inuse);
1965 }
1966
1967 snprintf(set->name, sizeof(set->name), name, n);
1968 list_for_each_entry(i, &ctx->table->sets, list) {
1969 if (!strcmp(set->name, i->name))
1970 return -ENFILE;
1971 }
1972 return 0;
1973}
1974
1975static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
1976 const struct nft_set *set, u16 event, u16 flags)
1977{
1978 struct nfgenmsg *nfmsg;
1979 struct nlmsghdr *nlh;
1980 u32 portid = NETLINK_CB(ctx->skb).portid;
1981 u32 seq = ctx->nlh->nlmsg_seq;
1982
1983 event |= NFNL_SUBSYS_NFTABLES << 8;
1984 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
1985 flags);
1986 if (nlh == NULL)
1987 goto nla_put_failure;
1988
1989 nfmsg = nlmsg_data(nlh);
1990 nfmsg->nfgen_family = ctx->afi->family;
1991 nfmsg->version = NFNETLINK_V0;
1992 nfmsg->res_id = 0;
1993
1994 if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
1995 goto nla_put_failure;
1996 if (nla_put_string(skb, NFTA_SET_NAME, set->name))
1997 goto nla_put_failure;
1998 if (set->flags != 0)
1999 if (nla_put_be32(skb, NFTA_SET_FLAGS, htonl(set->flags)))
2000 goto nla_put_failure;
2001
2002 if (nla_put_be32(skb, NFTA_SET_KEY_TYPE, htonl(set->ktype)))
2003 goto nla_put_failure;
2004 if (nla_put_be32(skb, NFTA_SET_KEY_LEN, htonl(set->klen)))
2005 goto nla_put_failure;
2006 if (set->flags & NFT_SET_MAP) {
2007 if (nla_put_be32(skb, NFTA_SET_DATA_TYPE, htonl(set->dtype)))
2008 goto nla_put_failure;
2009 if (nla_put_be32(skb, NFTA_SET_DATA_LEN, htonl(set->dlen)))
2010 goto nla_put_failure;
2011 }
2012
2013 return nlmsg_end(skb, nlh);
2014
2015nla_put_failure:
2016 nlmsg_trim(skb, nlh);
2017 return -1;
2018}
2019
2020static int nf_tables_set_notify(const struct nft_ctx *ctx,
2021 const struct nft_set *set,
2022 int event)
2023{
2024 struct sk_buff *skb;
2025 u32 portid = NETLINK_CB(ctx->skb).portid;
2026 bool report;
2027 int err;
2028
2029 report = nlmsg_report(ctx->nlh);
2030 if (!report && !nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
2031 return 0;
2032
2033 err = -ENOBUFS;
2034 skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
2035 if (skb == NULL)
2036 goto err;
2037
2038 err = nf_tables_fill_set(skb, ctx, set, event, 0);
2039 if (err < 0) {
2040 kfree_skb(skb);
2041 goto err;
2042 }
2043
2044 err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, report,
2045 GFP_KERNEL);
2046err:
2047 if (err < 0)
2048 nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
2049 return err;
2050}
2051
2052static int nf_tables_dump_sets_table(struct nft_ctx *ctx, struct sk_buff *skb,
2053 struct netlink_callback *cb)
2054{
2055 const struct nft_set *set;
2056 unsigned int idx = 0, s_idx = cb->args[0];
2057
2058 if (cb->args[1])
2059 return skb->len;
2060
2061 list_for_each_entry(set, &ctx->table->sets, list) {
2062 if (idx < s_idx)
2063 goto cont;
2064 if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
2065 NLM_F_MULTI) < 0) {
2066 cb->args[0] = idx;
2067 goto done;
2068 }
2069cont:
2070 idx++;
2071 }
2072 cb->args[1] = 1;
2073done:
2074 return skb->len;
2075}
2076
2077static int nf_tables_dump_sets_all(struct nft_ctx *ctx, struct sk_buff *skb,
2078 struct netlink_callback *cb)
2079{
2080 const struct nft_set *set;
2081 unsigned int idx = 0, s_idx = cb->args[0];
2082 struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
2083
2084 if (cb->args[1])
2085 return skb->len;
2086
2087 list_for_each_entry(table, &ctx->afi->tables, list) {
2088 if (cur_table && cur_table != table)
2089 continue;
2090
2091 ctx->table = table;
2092 list_for_each_entry(set, &ctx->table->sets, list) {
2093 if (idx < s_idx)
2094 goto cont;
2095 if (nf_tables_fill_set(skb, ctx, set, NFT_MSG_NEWSET,
2096 NLM_F_MULTI) < 0) {
2097 cb->args[0] = idx;
2098 cb->args[2] = (unsigned long) table;
2099 goto done;
2100 }
2101cont:
2102 idx++;
2103 }
2104 }
2105 cb->args[1] = 1;
2106done:
2107 return skb->len;
2108}
2109
2110static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
2111{
2112 const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
2113 struct nlattr *nla[NFTA_SET_MAX + 1];
2114 struct nft_ctx ctx;
2115 int err, ret;
2116
2117 err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_MAX,
2118 nft_set_policy);
2119 if (err < 0)
2120 return err;
2121
2122 err = nft_ctx_init_from_setattr(&ctx, cb->skb, cb->nlh, (void *)nla);
2123 if (err < 0)
2124 return err;
2125
2126 if (ctx.table == NULL)
2127 ret = nf_tables_dump_sets_all(&ctx, skb, cb);
2128 else
2129 ret = nf_tables_dump_sets_table(&ctx, skb, cb);
2130
2131 return ret;
2132}
2133
2134static int nf_tables_getset(struct sock *nlsk, struct sk_buff *skb,
2135 const struct nlmsghdr *nlh,
2136 const struct nlattr * const nla[])
2137{
2138 const struct nft_set *set;
2139 struct nft_ctx ctx;
2140 struct sk_buff *skb2;
2141 int err;
2142
2143 /* Verify existance before starting dump */
2144 err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
2145 if (err < 0)
2146 return err;
2147
2148 if (nlh->nlmsg_flags & NLM_F_DUMP) {
2149 struct netlink_dump_control c = {
2150 .dump = nf_tables_dump_sets,
2151 };
2152 return netlink_dump_start(nlsk, skb, nlh, &c);
2153 }
2154
2155 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
2156 if (IS_ERR(set))
2157 return PTR_ERR(set);
2158
2159 skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2160 if (skb2 == NULL)
2161 return -ENOMEM;
2162
2163 err = nf_tables_fill_set(skb2, &ctx, set, NFT_MSG_NEWSET, 0);
2164 if (err < 0)
2165 goto err;
2166
2167 return nlmsg_unicast(nlsk, skb2, NETLINK_CB(skb).portid);
2168
2169err:
2170 kfree_skb(skb2);
2171 return err;
2172}
2173
2174static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
2175 const struct nlmsghdr *nlh,
2176 const struct nlattr * const nla[])
2177{
2178 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2179 const struct nft_set_ops *ops;
2180 const struct nft_af_info *afi;
2181 struct net *net = sock_net(skb->sk);
2182 struct nft_table *table;
2183 struct nft_set *set;
2184 struct nft_ctx ctx;
2185 char name[IFNAMSIZ];
2186 unsigned int size;
2187 bool create;
2188 u32 ktype, klen, dlen, dtype, flags;
2189 int err;
2190
2191 if (nla[NFTA_SET_TABLE] == NULL ||
2192 nla[NFTA_SET_NAME] == NULL ||
2193 nla[NFTA_SET_KEY_LEN] == NULL)
2194 return -EINVAL;
2195
2196 ktype = NFT_DATA_VALUE;
2197 if (nla[NFTA_SET_KEY_TYPE] != NULL) {
2198 ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
2199 if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
2200 return -EINVAL;
2201 }
2202
2203 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
2204 if (klen == 0 || klen > FIELD_SIZEOF(struct nft_data, data))
2205 return -EINVAL;
2206
2207 flags = 0;
2208 if (nla[NFTA_SET_FLAGS] != NULL) {
2209 flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
2210 if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
2211 NFT_SET_INTERVAL | NFT_SET_MAP))
2212 return -EINVAL;
2213 }
2214
2215 dtype = 0;
2216 dlen = 0;
2217 if (nla[NFTA_SET_DATA_TYPE] != NULL) {
2218 if (!(flags & NFT_SET_MAP))
2219 return -EINVAL;
2220
2221 dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
2222 if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
2223 dtype != NFT_DATA_VERDICT)
2224 return -EINVAL;
2225
2226 if (dtype != NFT_DATA_VERDICT) {
2227 if (nla[NFTA_SET_DATA_LEN] == NULL)
2228 return -EINVAL;
2229 dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
2230 if (dlen == 0 ||
2231 dlen > FIELD_SIZEOF(struct nft_data, data))
2232 return -EINVAL;
2233 } else
2234 dlen = sizeof(struct nft_data);
2235 } else if (flags & NFT_SET_MAP)
2236 return -EINVAL;
2237
2238 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false;
2239
2240 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, create);
2241 if (IS_ERR(afi))
2242 return PTR_ERR(afi);
2243
2244 table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
2245 if (IS_ERR(table))
2246 return PTR_ERR(table);
2247
2248 nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
2249
2250 set = nf_tables_set_lookup(table, nla[NFTA_SET_NAME]);
2251 if (IS_ERR(set)) {
2252 if (PTR_ERR(set) != -ENOENT)
2253 return PTR_ERR(set);
2254 set = NULL;
2255 }
2256
2257 if (set != NULL) {
2258 if (nlh->nlmsg_flags & NLM_F_EXCL)
2259 return -EEXIST;
2260 if (nlh->nlmsg_flags & NLM_F_REPLACE)
2261 return -EOPNOTSUPP;
2262 return 0;
2263 }
2264
2265 if (!(nlh->nlmsg_flags & NLM_F_CREATE))
2266 return -ENOENT;
2267
2268 ops = nft_select_set_ops(nla);
2269 if (IS_ERR(ops))
2270 return PTR_ERR(ops);
2271
2272 size = 0;
2273 if (ops->privsize != NULL)
2274 size = ops->privsize(nla);
2275
2276 err = -ENOMEM;
2277 set = kzalloc(sizeof(*set) + size, GFP_KERNEL);
2278 if (set == NULL)
2279 goto err1;
2280
2281 nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name));
2282 err = nf_tables_set_alloc_name(&ctx, set, name);
2283 if (err < 0)
2284 goto err2;
2285
2286 INIT_LIST_HEAD(&set->bindings);
2287 set->ops = ops;
2288 set->ktype = ktype;
2289 set->klen = klen;
2290 set->dtype = dtype;
2291 set->dlen = dlen;
2292 set->flags = flags;
2293
2294 err = ops->init(set, nla);
2295 if (err < 0)
2296 goto err2;
2297
2298 list_add_tail(&set->list, &table->sets);
2299 nf_tables_set_notify(&ctx, set, NFT_MSG_NEWSET);
2300 return 0;
2301
2302err2:
2303 kfree(set);
2304err1:
2305 module_put(ops->owner);
2306 return err;
2307}
2308
2309static void nf_tables_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
2310{
2311 list_del(&set->list);
2312 if (!(set->flags & NFT_SET_ANONYMOUS))
2313 nf_tables_set_notify(ctx, set, NFT_MSG_DELSET);
2314
2315 set->ops->destroy(set);
2316 module_put(set->ops->owner);
2317 kfree(set);
2318}
2319
2320static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
2321 const struct nlmsghdr *nlh,
2322 const struct nlattr * const nla[])
2323{
2324 struct nft_set *set;
2325 struct nft_ctx ctx;
2326 int err;
2327
2328 if (nla[NFTA_SET_TABLE] == NULL)
2329 return -EINVAL;
2330
2331 err = nft_ctx_init_from_setattr(&ctx, skb, nlh, nla);
2332 if (err < 0)
2333 return err;
2334
2335 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
2336 if (IS_ERR(set))
2337 return PTR_ERR(set);
2338 if (!list_empty(&set->bindings))
2339 return -EBUSY;
2340
2341 nf_tables_set_destroy(&ctx, set);
2342 return 0;
2343}
2344
2345static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
2346 const struct nft_set *set,
2347 const struct nft_set_iter *iter,
2348 const struct nft_set_elem *elem)
2349{
2350 enum nft_registers dreg;
2351
2352 dreg = nft_type_to_reg(set->dtype);
2353 return nft_validate_data_load(ctx, dreg, &elem->data, set->dtype);
2354}
2355
2356int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
2357 struct nft_set_binding *binding)
2358{
2359 struct nft_set_binding *i;
2360 struct nft_set_iter iter;
2361
2362 if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
2363 return -EBUSY;
2364
2365 if (set->flags & NFT_SET_MAP) {
2366 /* If the set is already bound to the same chain all
2367 * jumps are already validated for that chain.
2368 */
2369 list_for_each_entry(i, &set->bindings, list) {
2370 if (i->chain == binding->chain)
2371 goto bind;
2372 }
2373
2374 iter.skip = 0;
2375 iter.count = 0;
2376 iter.err = 0;
2377 iter.fn = nf_tables_bind_check_setelem;
2378
2379 set->ops->walk(ctx, set, &iter);
2380 if (iter.err < 0) {
2381 /* Destroy anonymous sets if binding fails */
2382 if (set->flags & NFT_SET_ANONYMOUS)
2383 nf_tables_set_destroy(ctx, set);
2384
2385 return iter.err;
2386 }
2387 }
2388bind:
2389 binding->chain = ctx->chain;
2390 list_add_tail(&binding->list, &set->bindings);
2391 return 0;
2392}
2393
2394void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
2395 struct nft_set_binding *binding)
2396{
2397 list_del(&binding->list);
2398
2399 if (list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
2400 nf_tables_set_destroy(ctx, set);
2401}
2402
2403/*
2404 * Set elements
2405 */
2406
2407static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
2408 [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
2409 [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
2410 [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
2411};
2412
2413static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
2414 [NFTA_SET_ELEM_LIST_TABLE] = { .type = NLA_STRING },
2415 [NFTA_SET_ELEM_LIST_SET] = { .type = NLA_STRING },
2416 [NFTA_SET_ELEM_LIST_ELEMENTS] = { .type = NLA_NESTED },
2417};
2418
2419static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx,
2420 const struct sk_buff *skb,
2421 const struct nlmsghdr *nlh,
2422 const struct nlattr * const nla[])
2423{
2424 const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
2425 const struct nft_af_info *afi;
2426 const struct nft_table *table;
2427 struct net *net = sock_net(skb->sk);
2428
2429 afi = nf_tables_afinfo_lookup(net, nfmsg->nfgen_family, false);
2430 if (IS_ERR(afi))
2431 return PTR_ERR(afi);
2432
2433 table = nf_tables_table_lookup(afi, nla[NFTA_SET_ELEM_LIST_TABLE]);
2434 if (IS_ERR(table))
2435 return PTR_ERR(table);
2436
2437 nft_ctx_init(ctx, skb, nlh, afi, table, NULL, nla);
2438 return 0;
2439}
2440
2441static int nf_tables_fill_setelem(struct sk_buff *skb,
2442 const struct nft_set *set,
2443 const struct nft_set_elem *elem)
2444{
2445 unsigned char *b = skb_tail_pointer(skb);
2446 struct nlattr *nest;
2447
2448 nest = nla_nest_start(skb, NFTA_LIST_ELEM);
2449 if (nest == NULL)
2450 goto nla_put_failure;
2451
2452 if (nft_data_dump(skb, NFTA_SET_ELEM_KEY, &elem->key, NFT_DATA_VALUE,
2453 set->klen) < 0)
2454 goto nla_put_failure;
2455
2456 if (set->flags & NFT_SET_MAP &&
2457 !(elem->flags & NFT_SET_ELEM_INTERVAL_END) &&
2458 nft_data_dump(skb, NFTA_SET_ELEM_DATA, &elem->data,
2459 set->dtype == NFT_DATA_VERDICT ? NFT_DATA_VERDICT : NFT_DATA_VALUE,
2460 set->dlen) < 0)
2461 goto nla_put_failure;
2462
2463 if (elem->flags != 0)
2464 if (nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(elem->flags)))
2465 goto nla_put_failure;
2466
2467 nla_nest_end(skb, nest);
2468 return 0;
2469
2470nla_put_failure:
2471 nlmsg_trim(skb, b);
2472 return -EMSGSIZE;
2473}
2474
2475struct nft_set_dump_args {
2476 const struct netlink_callback *cb;
2477 struct nft_set_iter iter;
2478 struct sk_buff *skb;
2479};
2480
2481static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
2482 const struct nft_set *set,
2483 const struct nft_set_iter *iter,
2484 const struct nft_set_elem *elem)
2485{
2486 struct nft_set_dump_args *args;
2487
2488 args = container_of(iter, struct nft_set_dump_args, iter);
2489 return nf_tables_fill_setelem(args->skb, set, elem);
2490}
2491
2492static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
2493{
2494 const struct nft_set *set;
2495 struct nft_set_dump_args args;
2496 struct nft_ctx ctx;
2497 struct nlattr *nla[NFTA_SET_ELEM_LIST_MAX + 1];
2498 struct nfgenmsg *nfmsg;
2499 struct nlmsghdr *nlh;
2500 struct nlattr *nest;
2501 u32 portid, seq;
2502 int event, err;
2503
2504 nfmsg = nlmsg_data(cb->nlh);
2505 err = nlmsg_parse(cb->nlh, sizeof(*nfmsg), nla, NFTA_SET_ELEM_LIST_MAX,
2506 nft_set_elem_list_policy);
2507 if (err < 0)
2508 return err;
2509
2510 err = nft_ctx_init_from_elemattr(&ctx, cb->skb, cb->nlh, (void *)nla);
2511 if (err < 0)
2512 return err;
2513
2514 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
2515 if (IS_ERR(set))
2516 return PTR_ERR(set);
2517
2518 event = NFT_MSG_NEWSETELEM;
2519 event |= NFNL_SUBSYS_NFTABLES << 8;
2520 portid = NETLINK_CB(cb->skb).portid;
2521 seq = cb->nlh->nlmsg_seq;
2522
2523 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
2524 NLM_F_MULTI);
2525 if (nlh == NULL)
2526 goto nla_put_failure;
2527
2528 nfmsg = nlmsg_data(nlh);
2529 nfmsg->nfgen_family = NFPROTO_UNSPEC;
2530 nfmsg->version = NFNETLINK_V0;
2531 nfmsg->res_id = 0;
2532
2533 if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, ctx.table->name))
2534 goto nla_put_failure;
2535 if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
2536 goto nla_put_failure;
2537
2538 nest = nla_nest_start(skb, NFTA_SET_ELEM_LIST_ELEMENTS);
2539 if (nest == NULL)
2540 goto nla_put_failure;
2541
2542 args.cb = cb;
2543 args.skb = skb;
2544 args.iter.skip = cb->args[0];
2545 args.iter.count = 0;
2546 args.iter.err = 0;
2547 args.iter.fn = nf_tables_dump_setelem;
2548 set->ops->walk(&ctx, set, &args.iter);
2549
2550 nla_nest_end(skb, nest);
2551 nlmsg_end(skb, nlh);
2552
2553 if (args.iter.err && args.iter.err != -EMSGSIZE)
2554 return args.iter.err;
2555 if (args.iter.count == cb->args[0])
2556 return 0;
2557
2558 cb->args[0] = args.iter.count;
2559 return skb->len;
2560
2561nla_put_failure:
2562 return -ENOSPC;
2563}
2564
2565static int nf_tables_getsetelem(struct sock *nlsk, struct sk_buff *skb,
2566 const struct nlmsghdr *nlh,
2567 const struct nlattr * const nla[])
2568{
2569 const struct nft_set *set;
2570 struct nft_ctx ctx;
2571 int err;
2572
2573 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
2574 if (err < 0)
2575 return err;
2576
2577 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
2578 if (IS_ERR(set))
2579 return PTR_ERR(set);
2580
2581 if (nlh->nlmsg_flags & NLM_F_DUMP) {
2582 struct netlink_dump_control c = {
2583 .dump = nf_tables_dump_set,
2584 };
2585 return netlink_dump_start(nlsk, skb, nlh, &c);
2586 }
2587 return -EOPNOTSUPP;
2588}
2589
2590static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
2591 const struct nlattr *attr)
2592{
2593 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
2594 struct nft_data_desc d1, d2;
2595 struct nft_set_elem elem;
2596 struct nft_set_binding *binding;
2597 enum nft_registers dreg;
2598 int err;
2599
2600 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
2601 nft_set_elem_policy);
2602 if (err < 0)
2603 return err;
2604
2605 if (nla[NFTA_SET_ELEM_KEY] == NULL)
2606 return -EINVAL;
2607
2608 elem.flags = 0;
2609 if (nla[NFTA_SET_ELEM_FLAGS] != NULL) {
2610 elem.flags = ntohl(nla_get_be32(nla[NFTA_SET_ELEM_FLAGS]));
2611 if (elem.flags & ~NFT_SET_ELEM_INTERVAL_END)
2612 return -EINVAL;
2613 }
2614
2615 if (set->flags & NFT_SET_MAP) {
2616 if (nla[NFTA_SET_ELEM_DATA] == NULL &&
2617 !(elem.flags & NFT_SET_ELEM_INTERVAL_END))
2618 return -EINVAL;
2619 } else {
2620 if (nla[NFTA_SET_ELEM_DATA] != NULL)
2621 return -EINVAL;
2622 }
2623
2624 err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
2625 if (err < 0)
2626 goto err1;
2627 err = -EINVAL;
2628 if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
2629 goto err2;
2630
2631 err = -EEXIST;
2632 if (set->ops->get(set, &elem) == 0)
2633 goto err2;
2634
2635 if (nla[NFTA_SET_ELEM_DATA] != NULL) {
2636 err = nft_data_init(ctx, &elem.data, &d2, nla[NFTA_SET_ELEM_DATA]);
2637 if (err < 0)
2638 goto err2;
2639
2640 err = -EINVAL;
2641 if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen)
2642 goto err3;
2643
2644 dreg = nft_type_to_reg(set->dtype);
2645 list_for_each_entry(binding, &set->bindings, list) {
2646 struct nft_ctx bind_ctx = {
2647 .afi = ctx->afi,
2648 .table = ctx->table,
2649 .chain = binding->chain,
2650 };
2651
2652 err = nft_validate_data_load(&bind_ctx, dreg,
2653 &elem.data, d2.type);
2654 if (err < 0)
2655 goto err3;
2656 }
2657 }
2658
2659 err = set->ops->insert(set, &elem);
2660 if (err < 0)
2661 goto err3;
2662
2663 return 0;
2664
2665err3:
2666 if (nla[NFTA_SET_ELEM_DATA] != NULL)
2667 nft_data_uninit(&elem.data, d2.type);
2668err2:
2669 nft_data_uninit(&elem.key, d1.type);
2670err1:
2671 return err;
2672}
2673
2674static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
2675 const struct nlmsghdr *nlh,
2676 const struct nlattr * const nla[])
2677{
2678 const struct nlattr *attr;
2679 struct nft_set *set;
2680 struct nft_ctx ctx;
2681 int rem, err;
2682
2683 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
2684 if (err < 0)
2685 return err;
2686
2687 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
2688 if (IS_ERR(set))
2689 return PTR_ERR(set);
2690 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
2691 return -EBUSY;
2692
2693 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
2694 err = nft_add_set_elem(&ctx, set, attr);
2695 if (err < 0)
2696 return err;
2697 }
2698 return 0;
2699}
2700
2701static int nft_del_setelem(const struct nft_ctx *ctx, struct nft_set *set,
2702 const struct nlattr *attr)
2703{
2704 struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
2705 struct nft_data_desc desc;
2706 struct nft_set_elem elem;
2707 int err;
2708
2709 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
2710 nft_set_elem_policy);
2711 if (err < 0)
2712 goto err1;
2713
2714 err = -EINVAL;
2715 if (nla[NFTA_SET_ELEM_KEY] == NULL)
2716 goto err1;
2717
2718 err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]);
2719 if (err < 0)
2720 goto err1;
2721
2722 err = -EINVAL;
2723 if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
2724 goto err2;
2725
2726 err = set->ops->get(set, &elem);
2727 if (err < 0)
2728 goto err2;
2729
2730 set->ops->remove(set, &elem);
2731
2732 nft_data_uninit(&elem.key, NFT_DATA_VALUE);
2733 if (set->flags & NFT_SET_MAP)
2734 nft_data_uninit(&elem.data, set->dtype);
2735
2736err2:
2737 nft_data_uninit(&elem.key, desc.type);
2738err1:
2739 return err;
2740}
2741
2742static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
2743 const struct nlmsghdr *nlh,
2744 const struct nlattr * const nla[])
2745{
2746 const struct nlattr *attr;
2747 struct nft_set *set;
2748 struct nft_ctx ctx;
2749 int rem, err;
2750
2751 err = nft_ctx_init_from_elemattr(&ctx, skb, nlh, nla);
2752 if (err < 0)
2753 return err;
2754
2755 set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET]);
2756 if (IS_ERR(set))
2757 return PTR_ERR(set);
2758 if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
2759 return -EBUSY;
2760
2761 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
2762 err = nft_del_setelem(&ctx, set, attr);
2763 if (err < 0)
2764 return err;
2765 }
2766 return 0;
2767}
2768
2769static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
2770 [NFT_MSG_NEWTABLE] = {
2771 .call = nf_tables_newtable,
2772 .attr_count = NFTA_TABLE_MAX,
2773 .policy = nft_table_policy,
2774 },
2775 [NFT_MSG_GETTABLE] = {
2776 .call = nf_tables_gettable,
2777 .attr_count = NFTA_TABLE_MAX,
2778 .policy = nft_table_policy,
2779 },
2780 [NFT_MSG_DELTABLE] = {
2781 .call = nf_tables_deltable,
2782 .attr_count = NFTA_TABLE_MAX,
2783 .policy = nft_table_policy,
2784 },
2785 [NFT_MSG_NEWCHAIN] = {
2786 .call = nf_tables_newchain,
2787 .attr_count = NFTA_CHAIN_MAX,
2788 .policy = nft_chain_policy,
2789 },
2790 [NFT_MSG_GETCHAIN] = {
2791 .call = nf_tables_getchain,
2792 .attr_count = NFTA_CHAIN_MAX,
2793 .policy = nft_chain_policy,
2794 },
2795 [NFT_MSG_DELCHAIN] = {
2796 .call = nf_tables_delchain,
2797 .attr_count = NFTA_CHAIN_MAX,
2798 .policy = nft_chain_policy,
2799 },
2800 [NFT_MSG_NEWRULE] = {
2801 .call_batch = nf_tables_newrule,
2802 .attr_count = NFTA_RULE_MAX,
2803 .policy = nft_rule_policy,
2804 },
2805 [NFT_MSG_GETRULE] = {
2806 .call = nf_tables_getrule,
2807 .attr_count = NFTA_RULE_MAX,
2808 .policy = nft_rule_policy,
2809 },
2810 [NFT_MSG_DELRULE] = {
2811 .call_batch = nf_tables_delrule,
2812 .attr_count = NFTA_RULE_MAX,
2813 .policy = nft_rule_policy,
2814 },
2815 [NFT_MSG_NEWSET] = {
2816 .call = nf_tables_newset,
2817 .attr_count = NFTA_SET_MAX,
2818 .policy = nft_set_policy,
2819 },
2820 [NFT_MSG_GETSET] = {
2821 .call = nf_tables_getset,
2822 .attr_count = NFTA_SET_MAX,
2823 .policy = nft_set_policy,
2824 },
2825 [NFT_MSG_DELSET] = {
2826 .call = nf_tables_delset,
2827 .attr_count = NFTA_SET_MAX,
2828 .policy = nft_set_policy,
2829 },
2830 [NFT_MSG_NEWSETELEM] = {
2831 .call = nf_tables_newsetelem,
2832 .attr_count = NFTA_SET_ELEM_LIST_MAX,
2833 .policy = nft_set_elem_list_policy,
2834 },
2835 [NFT_MSG_GETSETELEM] = {
2836 .call = nf_tables_getsetelem,
2837 .attr_count = NFTA_SET_ELEM_LIST_MAX,
2838 .policy = nft_set_elem_list_policy,
2839 },
2840 [NFT_MSG_DELSETELEM] = {
2841 .call = nf_tables_delsetelem,
2842 .attr_count = NFTA_SET_ELEM_LIST_MAX,
2843 .policy = nft_set_elem_list_policy,
2844 },
2845};
2846
2847static const struct nfnetlink_subsystem nf_tables_subsys = {
2848 .name = "nf_tables",
2849 .subsys_id = NFNL_SUBSYS_NFTABLES,
2850 .cb_count = NFT_MSG_MAX,
2851 .cb = nf_tables_cb,
2852 .commit = nf_tables_commit,
2853 .abort = nf_tables_abort,
2854};
2855
2856/*
2857 * Loop detection - walk through the ruleset beginning at the destination chain
2858 * of a new jump until either the source chain is reached (loop) or all
2859 * reachable chains have been traversed.
2860 *
2861 * The loop check is performed whenever a new jump verdict is added to an
2862 * expression or verdict map or a verdict map is bound to a new chain.
2863 */
2864
2865static int nf_tables_check_loops(const struct nft_ctx *ctx,
2866 const struct nft_chain *chain);
2867
2868static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
2869 const struct nft_set *set,
2870 const struct nft_set_iter *iter,
2871 const struct nft_set_elem *elem)
2872{
2873 switch (elem->data.verdict) {
2874 case NFT_JUMP:
2875 case NFT_GOTO:
2876 return nf_tables_check_loops(ctx, elem->data.chain);
2877 default:
2878 return 0;
2879 }
2880}
2881
2882static int nf_tables_check_loops(const struct nft_ctx *ctx,
2883 const struct nft_chain *chain)
2884{
2885 const struct nft_rule *rule;
2886 const struct nft_expr *expr, *last;
2887 const struct nft_set *set;
2888 struct nft_set_binding *binding;
2889 struct nft_set_iter iter;
2890
2891 if (ctx->chain == chain)
2892 return -ELOOP;
2893
2894 list_for_each_entry(rule, &chain->rules, list) {
2895 nft_rule_for_each_expr(expr, last, rule) {
2896 const struct nft_data *data = NULL;
2897 int err;
2898
2899 if (!expr->ops->validate)
2900 continue;
2901
2902 err = expr->ops->validate(ctx, expr, &data);
2903 if (err < 0)
2904 return err;
2905
2906 if (data == NULL)
2907 continue;
2908
2909 switch (data->verdict) {
2910 case NFT_JUMP:
2911 case NFT_GOTO:
2912 err = nf_tables_check_loops(ctx, data->chain);
2913 if (err < 0)
2914 return err;
2915 default:
2916 break;
2917 }
2918 }
2919 }
2920
2921 list_for_each_entry(set, &ctx->table->sets, list) {
2922 if (!(set->flags & NFT_SET_MAP) ||
2923 set->dtype != NFT_DATA_VERDICT)
2924 continue;
2925
2926 list_for_each_entry(binding, &set->bindings, list) {
2927 if (binding->chain != chain)
2928 continue;
2929
2930 iter.skip = 0;
2931 iter.count = 0;
2932 iter.err = 0;
2933 iter.fn = nf_tables_loop_check_setelem;
2934
2935 set->ops->walk(ctx, set, &iter);
2936 if (iter.err < 0)
2937 return iter.err;
2938 }
2939 }
2940
2941 return 0;
2942}
2943
2944/**
2945 * nft_validate_input_register - validate an expressions' input register
2946 *
2947 * @reg: the register number
2948 *
2949 * Validate that the input register is one of the general purpose
2950 * registers.
2951 */
2952int nft_validate_input_register(enum nft_registers reg)
2953{
2954 if (reg <= NFT_REG_VERDICT)
2955 return -EINVAL;
2956 if (reg > NFT_REG_MAX)
2957 return -ERANGE;
2958 return 0;
2959}
2960EXPORT_SYMBOL_GPL(nft_validate_input_register);
2961
2962/**
2963 * nft_validate_output_register - validate an expressions' output register
2964 *
2965 * @reg: the register number
2966 *
2967 * Validate that the output register is one of the general purpose
2968 * registers or the verdict register.
2969 */
2970int nft_validate_output_register(enum nft_registers reg)
2971{
2972 if (reg < NFT_REG_VERDICT)
2973 return -EINVAL;
2974 if (reg > NFT_REG_MAX)
2975 return -ERANGE;
2976 return 0;
2977}
2978EXPORT_SYMBOL_GPL(nft_validate_output_register);
2979
2980/**
2981 * nft_validate_data_load - validate an expressions' data load
2982 *
2983 * @ctx: context of the expression performing the load
2984 * @reg: the destination register number
2985 * @data: the data to load
2986 * @type: the data type
2987 *
2988 * Validate that a data load uses the appropriate data type for
2989 * the destination register. A value of NULL for the data means
2990 * that its runtime gathered data, which is always of type
2991 * NFT_DATA_VALUE.
2992 */
2993int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
2994 const struct nft_data *data,
2995 enum nft_data_types type)
2996{
2997 int err;
2998
2999 switch (reg) {
3000 case NFT_REG_VERDICT:
3001 if (data == NULL || type != NFT_DATA_VERDICT)
3002 return -EINVAL;
3003
3004 if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) {
3005 err = nf_tables_check_loops(ctx, data->chain);
3006 if (err < 0)
3007 return err;
3008
3009 if (ctx->chain->level + 1 > data->chain->level) {
3010 if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
3011 return -EMLINK;
3012 data->chain->level = ctx->chain->level + 1;
3013 }
3014 }
3015
3016 return 0;
3017 default:
3018 if (data != NULL && type != NFT_DATA_VALUE)
3019 return -EINVAL;
3020 return 0;
3021 }
3022}
3023EXPORT_SYMBOL_GPL(nft_validate_data_load);
3024
3025static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
3026 [NFTA_VERDICT_CODE] = { .type = NLA_U32 },
3027 [NFTA_VERDICT_CHAIN] = { .type = NLA_STRING,
3028 .len = NFT_CHAIN_MAXNAMELEN - 1 },
3029};
3030
3031static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
3032 struct nft_data_desc *desc, const struct nlattr *nla)
3033{
3034 struct nlattr *tb[NFTA_VERDICT_MAX + 1];
3035 struct nft_chain *chain;
3036 int err;
3037
3038 err = nla_parse_nested(tb, NFTA_VERDICT_MAX, nla, nft_verdict_policy);
3039 if (err < 0)
3040 return err;
3041
3042 if (!tb[NFTA_VERDICT_CODE])
3043 return -EINVAL;
3044 data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
3045
3046 switch (data->verdict) {
3047 case NF_ACCEPT:
3048 case NF_DROP:
3049 case NF_QUEUE:
3050 case NFT_CONTINUE:
3051 case NFT_BREAK:
3052 case NFT_RETURN:
3053 desc->len = sizeof(data->verdict);
3054 break;
3055 case NFT_JUMP:
3056 case NFT_GOTO:
3057 if (!tb[NFTA_VERDICT_CHAIN])
3058 return -EINVAL;
3059 chain = nf_tables_chain_lookup(ctx->table,
3060 tb[NFTA_VERDICT_CHAIN]);
3061 if (IS_ERR(chain))
3062 return PTR_ERR(chain);
3063 if (chain->flags & NFT_BASE_CHAIN)
3064 return -EOPNOTSUPP;
3065
3066 chain->use++;
3067 data->chain = chain;
3068 desc->len = sizeof(data);
3069 break;
3070 default:
3071 return -EINVAL;
3072 }
3073
3074 desc->type = NFT_DATA_VERDICT;
3075 return 0;
3076}
3077
3078static void nft_verdict_uninit(const struct nft_data *data)
3079{
3080 switch (data->verdict) {
3081 case NFT_JUMP:
3082 case NFT_GOTO:
3083 data->chain->use--;
3084 break;
3085 }
3086}
3087
3088static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
3089{
3090 struct nlattr *nest;
3091
3092 nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
3093 if (!nest)
3094 goto nla_put_failure;
3095
3096 if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict)))
3097 goto nla_put_failure;
3098
3099 switch (data->verdict) {
3100 case NFT_JUMP:
3101 case NFT_GOTO:
3102 if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name))
3103 goto nla_put_failure;
3104 }
3105 nla_nest_end(skb, nest);
3106 return 0;
3107
3108nla_put_failure:
3109 return -1;
3110}
3111
3112static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data,
3113 struct nft_data_desc *desc, const struct nlattr *nla)
3114{
3115 unsigned int len;
3116
3117 len = nla_len(nla);
3118 if (len == 0)
3119 return -EINVAL;
3120 if (len > sizeof(data->data))
3121 return -EOVERFLOW;
3122
3123 nla_memcpy(data->data, nla, sizeof(data->data));
3124 desc->type = NFT_DATA_VALUE;
3125 desc->len = len;
3126 return 0;
3127}
3128
3129static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data,
3130 unsigned int len)
3131{
3132 return nla_put(skb, NFTA_DATA_VALUE, len, data->data);
3133}
3134
3135static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = {
3136 [NFTA_DATA_VALUE] = { .type = NLA_BINARY,
3137 .len = FIELD_SIZEOF(struct nft_data, data) },
3138 [NFTA_DATA_VERDICT] = { .type = NLA_NESTED },
3139};
3140
3141/**
3142 * nft_data_init - parse nf_tables data netlink attributes
3143 *
3144 * @ctx: context of the expression using the data
3145 * @data: destination struct nft_data
3146 * @desc: data description
3147 * @nla: netlink attribute containing data
3148 *
3149 * Parse the netlink data attributes and initialize a struct nft_data.
3150 * The type and length of data are returned in the data description.
3151 *
3152 * The caller can indicate that it only wants to accept data of type
3153 * NFT_DATA_VALUE by passing NULL for the ctx argument.
3154 */
3155int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data,
3156 struct nft_data_desc *desc, const struct nlattr *nla)
3157{
3158 struct nlattr *tb[NFTA_DATA_MAX + 1];
3159 int err;
3160
3161 err = nla_parse_nested(tb, NFTA_DATA_MAX, nla, nft_data_policy);
3162 if (err < 0)
3163 return err;
3164
3165 if (tb[NFTA_DATA_VALUE])
3166 return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]);
3167 if (tb[NFTA_DATA_VERDICT] && ctx != NULL)
3168 return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]);
3169 return -EINVAL;
3170}
3171EXPORT_SYMBOL_GPL(nft_data_init);
3172
3173/**
3174 * nft_data_uninit - release a nft_data item
3175 *
3176 * @data: struct nft_data to release
3177 * @type: type of data
3178 *
3179 * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded,
3180 * all others need to be released by calling this function.
3181 */
3182void nft_data_uninit(const struct nft_data *data, enum nft_data_types type)
3183{
3184 switch (type) {
3185 case NFT_DATA_VALUE:
3186 return;
3187 case NFT_DATA_VERDICT:
3188 return nft_verdict_uninit(data);
3189 default:
3190 WARN_ON(1);
3191 }
3192}
3193EXPORT_SYMBOL_GPL(nft_data_uninit);
3194
3195int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
3196 enum nft_data_types type, unsigned int len)
3197{
3198 struct nlattr *nest;
3199 int err;
3200
3201 nest = nla_nest_start(skb, attr);
3202 if (nest == NULL)
3203 return -1;
3204
3205 switch (type) {
3206 case NFT_DATA_VALUE:
3207 err = nft_value_dump(skb, data, len);
3208 break;
3209 case NFT_DATA_VERDICT:
3210 err = nft_verdict_dump(skb, data);
3211 break;
3212 default:
3213 err = -EINVAL;
3214 WARN_ON(1);
3215 }
3216
3217 nla_nest_end(skb, nest);
3218 return err;
3219}
3220EXPORT_SYMBOL_GPL(nft_data_dump);
3221
3222static int nf_tables_init_net(struct net *net)
3223{
3224 INIT_LIST_HEAD(&net->nft.af_info);
3225 INIT_LIST_HEAD(&net->nft.commit_list);
3226 return 0;
3227}
3228
3229static struct pernet_operations nf_tables_net_ops = {
3230 .init = nf_tables_init_net,
3231};
3232
3233static int __init nf_tables_module_init(void)
3234{
3235 int err;
3236
3237 info = kmalloc(sizeof(struct nft_expr_info) * NFT_RULE_MAXEXPRS,
3238 GFP_KERNEL);
3239 if (info == NULL) {
3240 err = -ENOMEM;
3241 goto err1;
3242 }
3243
3244 err = nf_tables_core_module_init();
3245 if (err < 0)
3246 goto err2;
3247
3248 err = nfnetlink_subsys_register(&nf_tables_subsys);
3249 if (err < 0)
3250 goto err3;
3251
3252 pr_info("nf_tables: (c) 2007-2009 Patrick McHardy <kaber@trash.net>\n");
3253 return register_pernet_subsys(&nf_tables_net_ops);
3254err3:
3255 nf_tables_core_module_exit();
3256err2:
3257 kfree(info);
3258err1:
3259 return err;
3260}
3261
3262static void __exit nf_tables_module_exit(void)
3263{
3264 unregister_pernet_subsys(&nf_tables_net_ops);
3265 nfnetlink_subsys_unregister(&nf_tables_subsys);
3266 nf_tables_core_module_exit();
3267 kfree(info);
3268}
3269
3270module_init(nf_tables_module_init);
3271module_exit(nf_tables_module_exit);
3272
3273MODULE_LICENSE("GPL");
3274MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
3275MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
new file mode 100644
index 000000000000..cb9e685caae1
--- /dev/null
+++ b/net/netfilter/nf_tables_core.c
@@ -0,0 +1,270 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/list.h>
14#include <linux/rculist.h>
15#include <linux/skbuff.h>
16#include <linux/netlink.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter/nfnetlink.h>
19#include <linux/netfilter/nf_tables.h>
20#include <net/netfilter/nf_tables_core.h>
21#include <net/netfilter/nf_tables.h>
22#include <net/netfilter/nf_log.h>
23
24static void nft_cmp_fast_eval(const struct nft_expr *expr,
25 struct nft_data data[NFT_REG_MAX + 1])
26{
27 const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
28 u32 mask;
29
30 mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - priv->len);
31 if ((data[priv->sreg].data[0] & mask) == priv->data)
32 return;
33 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
34}
35
36static bool nft_payload_fast_eval(const struct nft_expr *expr,
37 struct nft_data data[NFT_REG_MAX + 1],
38 const struct nft_pktinfo *pkt)
39{
40 const struct nft_payload *priv = nft_expr_priv(expr);
41 const struct sk_buff *skb = pkt->skb;
42 struct nft_data *dest = &data[priv->dreg];
43 unsigned char *ptr;
44
45 if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
46 ptr = skb_network_header(skb);
47 else
48 ptr = skb_network_header(skb) + pkt->xt.thoff;
49
50 ptr += priv->offset;
51
52 if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
53 return false;
54
55 if (priv->len == 2)
56 *(u16 *)dest->data = *(u16 *)ptr;
57 else if (priv->len == 4)
58 *(u32 *)dest->data = *(u32 *)ptr;
59 else
60 *(u8 *)dest->data = *(u8 *)ptr;
61 return true;
62}
63
64struct nft_jumpstack {
65 const struct nft_chain *chain;
66 const struct nft_rule *rule;
67 int rulenum;
68};
69
70static inline void
71nft_chain_stats(const struct nft_chain *this, const struct nft_pktinfo *pkt,
72 struct nft_jumpstack *jumpstack, unsigned int stackptr)
73{
74 struct nft_stats __percpu *stats;
75 const struct nft_chain *chain = stackptr ? jumpstack[0].chain : this;
76
77 rcu_read_lock_bh();
78 stats = rcu_dereference(nft_base_chain(chain)->stats);
79 __this_cpu_inc(stats->pkts);
80 __this_cpu_add(stats->bytes, pkt->skb->len);
81 rcu_read_unlock_bh();
82}
83
84enum nft_trace {
85 NFT_TRACE_RULE,
86 NFT_TRACE_RETURN,
87 NFT_TRACE_POLICY,
88};
89
90static const char *const comments[] = {
91 [NFT_TRACE_RULE] = "rule",
92 [NFT_TRACE_RETURN] = "return",
93 [NFT_TRACE_POLICY] = "policy",
94};
95
96static struct nf_loginfo trace_loginfo = {
97 .type = NF_LOG_TYPE_LOG,
98 .u = {
99 .log = {
100 .level = 4,
101 .logflags = NF_LOG_MASK,
102 },
103 },
104};
105
106static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
107 const struct nft_chain *chain,
108 int rulenum, enum nft_trace type)
109{
110 struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
111
112 nf_log_packet(net, pkt->xt.family, pkt->hooknum, pkt->skb, pkt->in,
113 pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
114 chain->table->name, chain->name, comments[type],
115 rulenum);
116}
117
118unsigned int
119nft_do_chain_pktinfo(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
120{
121 const struct nft_chain *chain = ops->priv;
122 const struct nft_rule *rule;
123 const struct nft_expr *expr, *last;
124 struct nft_data data[NFT_REG_MAX + 1];
125 unsigned int stackptr = 0;
126 struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
127 int rulenum = 0;
128 /*
129 * Cache cursor to avoid problems in case that the cursor is updated
130 * while traversing the ruleset.
131 */
132 unsigned int gencursor = ACCESS_ONCE(chain->net->nft.gencursor);
133
134do_chain:
135 rule = list_entry(&chain->rules, struct nft_rule, list);
136next_rule:
137 data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
138 list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
139
140 /* This rule is not active, skip. */
141 if (unlikely(rule->genmask & (1 << gencursor)))
142 continue;
143
144 rulenum++;
145
146 nft_rule_for_each_expr(expr, last, rule) {
147 if (expr->ops == &nft_cmp_fast_ops)
148 nft_cmp_fast_eval(expr, data);
149 else if (expr->ops != &nft_payload_fast_ops ||
150 !nft_payload_fast_eval(expr, data, pkt))
151 expr->ops->eval(expr, data, pkt);
152
153 if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE)
154 break;
155 }
156
157 switch (data[NFT_REG_VERDICT].verdict) {
158 case NFT_BREAK:
159 data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
160 /* fall through */
161 case NFT_CONTINUE:
162 continue;
163 }
164 break;
165 }
166
167 switch (data[NFT_REG_VERDICT].verdict) {
168 case NF_ACCEPT:
169 case NF_DROP:
170 case NF_QUEUE:
171 if (unlikely(pkt->skb->nf_trace))
172 nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
173
174 return data[NFT_REG_VERDICT].verdict;
175 case NFT_JUMP:
176 if (unlikely(pkt->skb->nf_trace))
177 nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
178
179 BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
180 jumpstack[stackptr].chain = chain;
181 jumpstack[stackptr].rule = rule;
182 jumpstack[stackptr].rulenum = rulenum;
183 stackptr++;
184 /* fall through */
185 case NFT_GOTO:
186 chain = data[NFT_REG_VERDICT].chain;
187 goto do_chain;
188 case NFT_RETURN:
189 if (unlikely(pkt->skb->nf_trace))
190 nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
191
192 /* fall through */
193 case NFT_CONTINUE:
194 break;
195 default:
196 WARN_ON(1);
197 }
198
199 if (stackptr > 0) {
200 if (unlikely(pkt->skb->nf_trace))
201 nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_RETURN);
202
203 stackptr--;
204 chain = jumpstack[stackptr].chain;
205 rule = jumpstack[stackptr].rule;
206 rulenum = jumpstack[stackptr].rulenum;
207 goto next_rule;
208 }
209 nft_chain_stats(chain, pkt, jumpstack, stackptr);
210
211 if (unlikely(pkt->skb->nf_trace))
212 nft_trace_packet(pkt, chain, ++rulenum, NFT_TRACE_POLICY);
213
214 return nft_base_chain(chain)->policy;
215}
216EXPORT_SYMBOL_GPL(nft_do_chain_pktinfo);
217
218int __init nf_tables_core_module_init(void)
219{
220 int err;
221
222 err = nft_immediate_module_init();
223 if (err < 0)
224 goto err1;
225
226 err = nft_cmp_module_init();
227 if (err < 0)
228 goto err2;
229
230 err = nft_lookup_module_init();
231 if (err < 0)
232 goto err3;
233
234 err = nft_bitwise_module_init();
235 if (err < 0)
236 goto err4;
237
238 err = nft_byteorder_module_init();
239 if (err < 0)
240 goto err5;
241
242 err = nft_payload_module_init();
243 if (err < 0)
244 goto err6;
245
246 return 0;
247
248err6:
249 nft_byteorder_module_exit();
250err5:
251 nft_bitwise_module_exit();
252err4:
253 nft_lookup_module_exit();
254err3:
255 nft_cmp_module_exit();
256err2:
257 nft_immediate_module_exit();
258err1:
259 return err;
260}
261
262void nf_tables_core_module_exit(void)
263{
264 nft_payload_module_exit();
265 nft_byteorder_module_exit();
266 nft_bitwise_module_exit();
267 nft_lookup_module_exit();
268 nft_cmp_module_exit();
269 nft_immediate_module_exit();
270}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 572d87dc116f..027f16af51a0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -147,9 +147,6 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
147 const struct nfnetlink_subsystem *ss; 147 const struct nfnetlink_subsystem *ss;
148 int type, err; 148 int type, err;
149 149
150 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
151 return -EPERM;
152
153 /* All the messages must at least contain nfgenmsg */ 150 /* All the messages must at least contain nfgenmsg */
154 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) 151 if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))
155 return 0; 152 return 0;
@@ -217,9 +214,179 @@ replay:
217 } 214 }
218} 215}
219 216
217static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
218 u_int16_t subsys_id)
219{
220 struct sk_buff *nskb, *oskb = skb;
221 struct net *net = sock_net(skb->sk);
222 const struct nfnetlink_subsystem *ss;
223 const struct nfnl_callback *nc;
224 bool success = true, done = false;
225 int err;
226
227 if (subsys_id >= NFNL_SUBSYS_COUNT)
228 return netlink_ack(skb, nlh, -EINVAL);
229replay:
230 nskb = netlink_skb_clone(oskb, GFP_KERNEL);
231 if (!nskb)
232 return netlink_ack(oskb, nlh, -ENOMEM);
233
234 nskb->sk = oskb->sk;
235 skb = nskb;
236
237 nfnl_lock(subsys_id);
238 ss = rcu_dereference_protected(table[subsys_id].subsys,
239 lockdep_is_held(&table[subsys_id].mutex));
240 if (!ss) {
241#ifdef CONFIG_MODULES
242 nfnl_unlock(subsys_id);
243 request_module("nfnetlink-subsys-%d", subsys_id);
244 nfnl_lock(subsys_id);
245 ss = rcu_dereference_protected(table[subsys_id].subsys,
246 lockdep_is_held(&table[subsys_id].mutex));
247 if (!ss)
248#endif
249 {
250 nfnl_unlock(subsys_id);
251 kfree_skb(nskb);
252 return netlink_ack(skb, nlh, -EOPNOTSUPP);
253 }
254 }
255
256 if (!ss->commit || !ss->abort) {
257 nfnl_unlock(subsys_id);
258 kfree_skb(nskb);
259 return netlink_ack(skb, nlh, -EOPNOTSUPP);
260 }
261
262 while (skb->len >= nlmsg_total_size(0)) {
263 int msglen, type;
264
265 nlh = nlmsg_hdr(skb);
266 err = 0;
267
268 if (nlh->nlmsg_len < NLMSG_HDRLEN) {
269 err = -EINVAL;
270 goto ack;
271 }
272
273 /* Only requests are handled by the kernel */
274 if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) {
275 err = -EINVAL;
276 goto ack;
277 }
278
279 type = nlh->nlmsg_type;
280 if (type == NFNL_MSG_BATCH_BEGIN) {
281 /* Malformed: Batch begin twice */
282 success = false;
283 goto done;
284 } else if (type == NFNL_MSG_BATCH_END) {
285 done = true;
286 goto done;
287 } else if (type < NLMSG_MIN_TYPE) {
288 err = -EINVAL;
289 goto ack;
290 }
291
292 /* We only accept a batch with messages for the same
293 * subsystem.
294 */
295 if (NFNL_SUBSYS_ID(type) != subsys_id) {
296 err = -EINVAL;
297 goto ack;
298 }
299
300 nc = nfnetlink_find_client(type, ss);
301 if (!nc) {
302 err = -EINVAL;
303 goto ack;
304 }
305
306 {
307 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
308 u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
309 struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
310 struct nlattr *attr = (void *)nlh + min_len;
311 int attrlen = nlh->nlmsg_len - min_len;
312
313 err = nla_parse(cda, ss->cb[cb_id].attr_count,
314 attr, attrlen, ss->cb[cb_id].policy);
315 if (err < 0)
316 goto ack;
317
318 if (nc->call_batch) {
319 err = nc->call_batch(net->nfnl, skb, nlh,
320 (const struct nlattr **)cda);
321 }
322
323 /* The lock was released to autoload some module, we
324 * have to abort and start from scratch using the
325 * original skb.
326 */
327 if (err == -EAGAIN) {
328 ss->abort(skb);
329 nfnl_unlock(subsys_id);
330 kfree_skb(nskb);
331 goto replay;
332 }
333 }
334ack:
335 if (nlh->nlmsg_flags & NLM_F_ACK || err) {
336 /* We don't stop processing the batch on errors, thus,
337 * userspace gets all the errors that the batch
338 * triggers.
339 */
340 netlink_ack(skb, nlh, err);
341 if (err)
342 success = false;
343 }
344
345 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
346 if (msglen > skb->len)
347 msglen = skb->len;
348 skb_pull(skb, msglen);
349 }
350done:
351 if (success && done)
352 ss->commit(skb);
353 else
354 ss->abort(skb);
355
356 nfnl_unlock(subsys_id);
357 kfree_skb(nskb);
358}
359
220static void nfnetlink_rcv(struct sk_buff *skb) 360static void nfnetlink_rcv(struct sk_buff *skb)
221{ 361{
222 netlink_rcv_skb(skb, &nfnetlink_rcv_msg); 362 struct nlmsghdr *nlh = nlmsg_hdr(skb);
363 struct net *net = sock_net(skb->sk);
364 int msglen;
365
366 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
367 return netlink_ack(skb, nlh, -EPERM);
368
369 if (nlh->nlmsg_len < NLMSG_HDRLEN ||
370 skb->len < nlh->nlmsg_len)
371 return;
372
373 if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) {
374 struct nfgenmsg *nfgenmsg;
375
376 msglen = NLMSG_ALIGN(nlh->nlmsg_len);
377 if (msglen > skb->len)
378 msglen = skb->len;
379
380 if (nlh->nlmsg_len < NLMSG_HDRLEN ||
381 skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg))
382 return;
383
384 nfgenmsg = nlmsg_data(nlh);
385 skb_pull(skb, msglen);
386 nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
387 } else {
388 netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
389 }
223} 390}
224 391
225#ifdef CONFIG_MODULES 392#ifdef CONFIG_MODULES
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
new file mode 100644
index 000000000000..4fb6ee2c1106
--- /dev/null
+++ b/net/netfilter/nft_bitwise.c
@@ -0,0 +1,146 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables_core.h>
18#include <net/netfilter/nf_tables.h>
19
20struct nft_bitwise {
21 enum nft_registers sreg:8;
22 enum nft_registers dreg:8;
23 u8 len;
24 struct nft_data mask;
25 struct nft_data xor;
26};
27
28static void nft_bitwise_eval(const struct nft_expr *expr,
29 struct nft_data data[NFT_REG_MAX + 1],
30 const struct nft_pktinfo *pkt)
31{
32 const struct nft_bitwise *priv = nft_expr_priv(expr);
33 const struct nft_data *src = &data[priv->sreg];
34 struct nft_data *dst = &data[priv->dreg];
35 unsigned int i;
36
37 for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) {
38 dst->data[i] = (src->data[i] & priv->mask.data[i]) ^
39 priv->xor.data[i];
40 }
41}
42
43static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
44 [NFTA_BITWISE_SREG] = { .type = NLA_U32 },
45 [NFTA_BITWISE_DREG] = { .type = NLA_U32 },
46 [NFTA_BITWISE_LEN] = { .type = NLA_U32 },
47 [NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
48 [NFTA_BITWISE_XOR] = { .type = NLA_NESTED },
49};
50
51static int nft_bitwise_init(const struct nft_ctx *ctx,
52 const struct nft_expr *expr,
53 const struct nlattr * const tb[])
54{
55 struct nft_bitwise *priv = nft_expr_priv(expr);
56 struct nft_data_desc d1, d2;
57 int err;
58
59 if (tb[NFTA_BITWISE_SREG] == NULL ||
60 tb[NFTA_BITWISE_DREG] == NULL ||
61 tb[NFTA_BITWISE_LEN] == NULL ||
62 tb[NFTA_BITWISE_MASK] == NULL ||
63 tb[NFTA_BITWISE_XOR] == NULL)
64 return -EINVAL;
65
66 priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG]));
67 err = nft_validate_input_register(priv->sreg);
68 if (err < 0)
69 return err;
70
71 priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG]));
72 err = nft_validate_output_register(priv->dreg);
73 if (err < 0)
74 return err;
75 err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
76 if (err < 0)
77 return err;
78
79 priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
80
81 err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]);
82 if (err < 0)
83 return err;
84 if (d1.len != priv->len)
85 return -EINVAL;
86
87 err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]);
88 if (err < 0)
89 return err;
90 if (d2.len != priv->len)
91 return -EINVAL;
92
93 return 0;
94}
95
96static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr)
97{
98 const struct nft_bitwise *priv = nft_expr_priv(expr);
99
100 if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg)))
101 goto nla_put_failure;
102 if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg)))
103 goto nla_put_failure;
104 if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len)))
105 goto nla_put_failure;
106
107 if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
108 NFT_DATA_VALUE, priv->len) < 0)
109 goto nla_put_failure;
110
111 if (nft_data_dump(skb, NFTA_BITWISE_XOR, &priv->xor,
112 NFT_DATA_VALUE, priv->len) < 0)
113 goto nla_put_failure;
114
115 return 0;
116
117nla_put_failure:
118 return -1;
119}
120
121static struct nft_expr_type nft_bitwise_type;
122static const struct nft_expr_ops nft_bitwise_ops = {
123 .type = &nft_bitwise_type,
124 .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)),
125 .eval = nft_bitwise_eval,
126 .init = nft_bitwise_init,
127 .dump = nft_bitwise_dump,
128};
129
130static struct nft_expr_type nft_bitwise_type __read_mostly = {
131 .name = "bitwise",
132 .ops = &nft_bitwise_ops,
133 .policy = nft_bitwise_policy,
134 .maxattr = NFTA_BITWISE_MAX,
135 .owner = THIS_MODULE,
136};
137
138int __init nft_bitwise_module_init(void)
139{
140 return nft_register_expr(&nft_bitwise_type);
141}
142
143void nft_bitwise_module_exit(void)
144{
145 nft_unregister_expr(&nft_bitwise_type);
146}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
new file mode 100644
index 000000000000..c39ed8d29df1
--- /dev/null
+++ b/net/netfilter/nft_byteorder.c
@@ -0,0 +1,173 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables_core.h>
18#include <net/netfilter/nf_tables.h>
19
20struct nft_byteorder {
21 enum nft_registers sreg:8;
22 enum nft_registers dreg:8;
23 enum nft_byteorder_ops op:8;
24 u8 len;
25 u8 size;
26};
27
28static void nft_byteorder_eval(const struct nft_expr *expr,
29 struct nft_data data[NFT_REG_MAX + 1],
30 const struct nft_pktinfo *pkt)
31{
32 const struct nft_byteorder *priv = nft_expr_priv(expr);
33 struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg];
34 union { u32 u32; u16 u16; } *s, *d;
35 unsigned int i;
36
37 s = (void *)src->data;
38 d = (void *)dst->data;
39
40 switch (priv->size) {
41 case 4:
42 switch (priv->op) {
43 case NFT_BYTEORDER_NTOH:
44 for (i = 0; i < priv->len / 4; i++)
45 d[i].u32 = ntohl((__force __be32)s[i].u32);
46 break;
47 case NFT_BYTEORDER_HTON:
48 for (i = 0; i < priv->len / 4; i++)
49 d[i].u32 = (__force __u32)htonl(s[i].u32);
50 break;
51 }
52 break;
53 case 2:
54 switch (priv->op) {
55 case NFT_BYTEORDER_NTOH:
56 for (i = 0; i < priv->len / 2; i++)
57 d[i].u16 = ntohs((__force __be16)s[i].u16);
58 break;
59 case NFT_BYTEORDER_HTON:
60 for (i = 0; i < priv->len / 2; i++)
61 d[i].u16 = (__force __u16)htons(s[i].u16);
62 break;
63 }
64 break;
65 }
66}
67
68static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
69 [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 },
70 [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 },
71 [NFTA_BYTEORDER_OP] = { .type = NLA_U32 },
72 [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 },
73 [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 },
74};
75
76static int nft_byteorder_init(const struct nft_ctx *ctx,
77 const struct nft_expr *expr,
78 const struct nlattr * const tb[])
79{
80 struct nft_byteorder *priv = nft_expr_priv(expr);
81 int err;
82
83 if (tb[NFTA_BYTEORDER_SREG] == NULL ||
84 tb[NFTA_BYTEORDER_DREG] == NULL ||
85 tb[NFTA_BYTEORDER_LEN] == NULL ||
86 tb[NFTA_BYTEORDER_SIZE] == NULL ||
87 tb[NFTA_BYTEORDER_OP] == NULL)
88 return -EINVAL;
89
90 priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG]));
91 err = nft_validate_input_register(priv->sreg);
92 if (err < 0)
93 return err;
94
95 priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG]));
96 err = nft_validate_output_register(priv->dreg);
97 if (err < 0)
98 return err;
99 err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
100 if (err < 0)
101 return err;
102
103 priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP]));
104 switch (priv->op) {
105 case NFT_BYTEORDER_NTOH:
106 case NFT_BYTEORDER_HTON:
107 break;
108 default:
109 return -EINVAL;
110 }
111
112 priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
113 if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data))
114 return -EINVAL;
115
116 priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
117 switch (priv->size) {
118 case 2:
119 case 4:
120 break;
121 default:
122 return -EINVAL;
123 }
124
125 return 0;
126}
127
128static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
129{
130 const struct nft_byteorder *priv = nft_expr_priv(expr);
131
132 if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg)))
133 goto nla_put_failure;
134 if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg)))
135 goto nla_put_failure;
136 if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op)))
137 goto nla_put_failure;
138 if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len)))
139 goto nla_put_failure;
140 if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size)))
141 goto nla_put_failure;
142 return 0;
143
144nla_put_failure:
145 return -1;
146}
147
148static struct nft_expr_type nft_byteorder_type;
149static const struct nft_expr_ops nft_byteorder_ops = {
150 .type = &nft_byteorder_type,
151 .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)),
152 .eval = nft_byteorder_eval,
153 .init = nft_byteorder_init,
154 .dump = nft_byteorder_dump,
155};
156
157static struct nft_expr_type nft_byteorder_type __read_mostly = {
158 .name = "byteorder",
159 .ops = &nft_byteorder_ops,
160 .policy = nft_byteorder_policy,
161 .maxattr = NFTA_BYTEORDER_MAX,
162 .owner = THIS_MODULE,
163};
164
165int __init nft_byteorder_module_init(void)
166{
167 return nft_register_expr(&nft_byteorder_type);
168}
169
170void nft_byteorder_module_exit(void)
171{
172 nft_unregister_expr(&nft_byteorder_type);
173}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
new file mode 100644
index 000000000000..954925db414d
--- /dev/null
+++ b/net/netfilter/nft_cmp.c
@@ -0,0 +1,223 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables_core.h>
18#include <net/netfilter/nf_tables.h>
19
20struct nft_cmp_expr {
21 struct nft_data data;
22 enum nft_registers sreg:8;
23 u8 len;
24 enum nft_cmp_ops op:8;
25};
26
27static void nft_cmp_eval(const struct nft_expr *expr,
28 struct nft_data data[NFT_REG_MAX + 1],
29 const struct nft_pktinfo *pkt)
30{
31 const struct nft_cmp_expr *priv = nft_expr_priv(expr);
32 int d;
33
34 d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len);
35 switch (priv->op) {
36 case NFT_CMP_EQ:
37 if (d != 0)
38 goto mismatch;
39 break;
40 case NFT_CMP_NEQ:
41 if (d == 0)
42 goto mismatch;
43 break;
44 case NFT_CMP_LT:
45 if (d == 0)
46 goto mismatch;
47 case NFT_CMP_LTE:
48 if (d > 0)
49 goto mismatch;
50 break;
51 case NFT_CMP_GT:
52 if (d == 0)
53 goto mismatch;
54 case NFT_CMP_GTE:
55 if (d < 0)
56 goto mismatch;
57 break;
58 }
59 return;
60
61mismatch:
62 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
63}
64
65static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = {
66 [NFTA_CMP_SREG] = { .type = NLA_U32 },
67 [NFTA_CMP_OP] = { .type = NLA_U32 },
68 [NFTA_CMP_DATA] = { .type = NLA_NESTED },
69};
70
71static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
72 const struct nlattr * const tb[])
73{
74 struct nft_cmp_expr *priv = nft_expr_priv(expr);
75 struct nft_data_desc desc;
76 int err;
77
78 priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
79 priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
80
81 err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]);
82 BUG_ON(err < 0);
83
84 priv->len = desc.len;
85 return 0;
86}
87
88static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr)
89{
90 const struct nft_cmp_expr *priv = nft_expr_priv(expr);
91
92 if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
93 goto nla_put_failure;
94 if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op)))
95 goto nla_put_failure;
96
97 if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data,
98 NFT_DATA_VALUE, priv->len) < 0)
99 goto nla_put_failure;
100 return 0;
101
102nla_put_failure:
103 return -1;
104}
105
106static struct nft_expr_type nft_cmp_type;
107static const struct nft_expr_ops nft_cmp_ops = {
108 .type = &nft_cmp_type,
109 .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)),
110 .eval = nft_cmp_eval,
111 .init = nft_cmp_init,
112 .dump = nft_cmp_dump,
113};
114
115static int nft_cmp_fast_init(const struct nft_ctx *ctx,
116 const struct nft_expr *expr,
117 const struct nlattr * const tb[])
118{
119 struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
120 struct nft_data_desc desc;
121 struct nft_data data;
122 u32 mask;
123 int err;
124
125 priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
126
127 err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
128 BUG_ON(err < 0);
129 desc.len *= BITS_PER_BYTE;
130
131 mask = ~0U >> (sizeof(priv->data) * BITS_PER_BYTE - desc.len);
132 priv->data = data.data[0] & mask;
133 priv->len = desc.len;
134 return 0;
135}
136
137static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr)
138{
139 const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
140 struct nft_data data;
141
142 if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg)))
143 goto nla_put_failure;
144 if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ)))
145 goto nla_put_failure;
146
147 data.data[0] = priv->data;
148 if (nft_data_dump(skb, NFTA_CMP_DATA, &data,
149 NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0)
150 goto nla_put_failure;
151 return 0;
152
153nla_put_failure:
154 return -1;
155}
156
157const struct nft_expr_ops nft_cmp_fast_ops = {
158 .type = &nft_cmp_type,
159 .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)),
160 .eval = NULL, /* inlined */
161 .init = nft_cmp_fast_init,
162 .dump = nft_cmp_fast_dump,
163};
164
165static const struct nft_expr_ops *
166nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
167{
168 struct nft_data_desc desc;
169 struct nft_data data;
170 enum nft_registers sreg;
171 enum nft_cmp_ops op;
172 int err;
173
174 if (tb[NFTA_CMP_SREG] == NULL ||
175 tb[NFTA_CMP_OP] == NULL ||
176 tb[NFTA_CMP_DATA] == NULL)
177 return ERR_PTR(-EINVAL);
178
179 sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG]));
180 err = nft_validate_input_register(sreg);
181 if (err < 0)
182 return ERR_PTR(err);
183
184 op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
185 switch (op) {
186 case NFT_CMP_EQ:
187 case NFT_CMP_NEQ:
188 case NFT_CMP_LT:
189 case NFT_CMP_LTE:
190 case NFT_CMP_GT:
191 case NFT_CMP_GTE:
192 break;
193 default:
194 return ERR_PTR(-EINVAL);
195 }
196
197 err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]);
198 if (err < 0)
199 return ERR_PTR(err);
200
201 if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ)
202 return &nft_cmp_fast_ops;
203 else
204 return &nft_cmp_ops;
205}
206
207static struct nft_expr_type nft_cmp_type __read_mostly = {
208 .name = "cmp",
209 .select_ops = nft_cmp_select_ops,
210 .policy = nft_cmp_policy,
211 .maxattr = NFTA_CMP_MAX,
212 .owner = THIS_MODULE,
213};
214
215int __init nft_cmp_module_init(void)
216{
217 return nft_register_expr(&nft_cmp_type);
218}
219
220void nft_cmp_module_exit(void)
221{
222 nft_unregister_expr(&nft_cmp_type);
223}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
new file mode 100644
index 000000000000..4811f762e060
--- /dev/null
+++ b/net/netfilter/nft_compat.c
@@ -0,0 +1,768 @@
1/*
2 * (C) 2012-2013 by Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * This software has been sponsored by Sophos Astaro <http://www.sophos.com>
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nfnetlink.h>
17#include <linux/netfilter/nf_tables.h>
18#include <linux/netfilter/nf_tables_compat.h>
19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter_ipv4/ip_tables.h>
21#include <linux/netfilter_ipv6/ip6_tables.h>
22#include <asm/uaccess.h> /* for set_fs */
23#include <net/netfilter/nf_tables.h>
24
25union nft_entry {
26 struct ipt_entry e4;
27 struct ip6t_entry e6;
28};
29
30static inline void
31nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info)
32{
33 par->target = xt;
34 par->targinfo = xt_info;
35 par->hotdrop = false;
36}
37
38static void nft_target_eval(const struct nft_expr *expr,
39 struct nft_data data[NFT_REG_MAX + 1],
40 const struct nft_pktinfo *pkt)
41{
42 void *info = nft_expr_priv(expr);
43 struct xt_target *target = expr->ops->data;
44 struct sk_buff *skb = pkt->skb;
45 int ret;
46
47 nft_compat_set_par((struct xt_action_param *)&pkt->xt, target, info);
48
49 ret = target->target(skb, &pkt->xt);
50
51 if (pkt->xt.hotdrop)
52 ret = NF_DROP;
53
54 switch(ret) {
55 case XT_CONTINUE:
56 data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
57 break;
58 default:
59 data[NFT_REG_VERDICT].verdict = ret;
60 break;
61 }
62 return;
63}
64
65static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
66 [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
67 [NFTA_TARGET_REV] = { .type = NLA_U32 },
68 [NFTA_TARGET_INFO] = { .type = NLA_BINARY },
69};
70
71static void
72nft_target_set_tgchk_param(struct xt_tgchk_param *par,
73 const struct nft_ctx *ctx,
74 struct xt_target *target, void *info,
75 union nft_entry *entry, u8 proto, bool inv)
76{
77 par->net = &init_net;
78 par->table = ctx->table->name;
79 switch (ctx->afi->family) {
80 case AF_INET:
81 entry->e4.ip.proto = proto;
82 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
83 break;
84 case AF_INET6:
85 entry->e6.ipv6.proto = proto;
86 entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
87 break;
88 }
89 par->entryinfo = entry;
90 par->target = target;
91 par->targinfo = info;
92 if (ctx->chain->flags & NFT_BASE_CHAIN) {
93 const struct nft_base_chain *basechain =
94 nft_base_chain(ctx->chain);
95 const struct nf_hook_ops *ops = &basechain->ops;
96
97 par->hook_mask = 1 << ops->hooknum;
98 }
99 par->family = ctx->afi->family;
100}
101
102static void target_compat_from_user(struct xt_target *t, void *in, void *out)
103{
104#ifdef CONFIG_COMPAT
105 if (t->compat_from_user) {
106 int pad;
107
108 t->compat_from_user(out, in);
109 pad = XT_ALIGN(t->targetsize) - t->targetsize;
110 if (pad > 0)
111 memset(out + t->targetsize, 0, pad);
112 } else
113#endif
114 memcpy(out, in, XT_ALIGN(t->targetsize));
115}
116
117static inline int nft_compat_target_offset(struct xt_target *target)
118{
119#ifdef CONFIG_COMPAT
120 return xt_compat_target_offset(target);
121#else
122 return 0;
123#endif
124}
125
126static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = {
127 [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 },
128 [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 },
129};
130
131static u8 nft_parse_compat(const struct nlattr *attr, bool *inv)
132{
133 struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
134 u32 flags;
135 int err;
136
137 err = nla_parse_nested(tb, NFTA_RULE_COMPAT_MAX, attr,
138 nft_rule_compat_policy);
139 if (err < 0)
140 return err;
141
142 if (!tb[NFTA_RULE_COMPAT_PROTO] || !tb[NFTA_RULE_COMPAT_FLAGS])
143 return -EINVAL;
144
145 flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
146 if (flags & ~NFT_RULE_COMPAT_F_MASK)
147 return -EINVAL;
148 if (flags & NFT_RULE_COMPAT_F_INV)
149 *inv = true;
150
151 return ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
152}
153
154static int
155nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
156 const struct nlattr * const tb[])
157{
158 void *info = nft_expr_priv(expr);
159 struct xt_target *target = expr->ops->data;
160 struct xt_tgchk_param par;
161 size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
162 u8 proto = 0;
163 bool inv = false;
164 union nft_entry e = {};
165 int ret;
166
167 target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
168
169 if (ctx->nla[NFTA_RULE_COMPAT])
170 proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv);
171
172 nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
173
174 ret = xt_check_target(&par, size, proto, inv);
175 if (ret < 0)
176 goto err;
177
178 /* The standard target cannot be used */
179 if (target->target == NULL) {
180 ret = -EINVAL;
181 goto err;
182 }
183
184 return 0;
185err:
186 module_put(target->me);
187 return ret;
188}
189
190static void
191nft_target_destroy(const struct nft_expr *expr)
192{
193 struct xt_target *target = expr->ops->data;
194
195 module_put(target->me);
196}
197
198static int
199target_dump_info(struct sk_buff *skb, const struct xt_target *t, const void *in)
200{
201 int ret;
202
203#ifdef CONFIG_COMPAT
204 if (t->compat_to_user) {
205 mm_segment_t old_fs;
206 void *out;
207
208 out = kmalloc(XT_ALIGN(t->targetsize), GFP_ATOMIC);
209 if (out == NULL)
210 return -ENOMEM;
211
212 /* We want to reuse existing compat_to_user */
213 old_fs = get_fs();
214 set_fs(KERNEL_DS);
215 t->compat_to_user(out, in);
216 set_fs(old_fs);
217 ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), out);
218 kfree(out);
219 } else
220#endif
221 ret = nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(t->targetsize), in);
222
223 return ret;
224}
225
226static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
227{
228 const struct xt_target *target = expr->ops->data;
229 void *info = nft_expr_priv(expr);
230
231 if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) ||
232 nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) ||
233 target_dump_info(skb, target, info))
234 goto nla_put_failure;
235
236 return 0;
237
238nla_put_failure:
239 return -1;
240}
241
242static int nft_target_validate(const struct nft_ctx *ctx,
243 const struct nft_expr *expr,
244 const struct nft_data **data)
245{
246 struct xt_target *target = expr->ops->data;
247 unsigned int hook_mask = 0;
248
249 if (ctx->chain->flags & NFT_BASE_CHAIN) {
250 const struct nft_base_chain *basechain =
251 nft_base_chain(ctx->chain);
252 const struct nf_hook_ops *ops = &basechain->ops;
253
254 hook_mask = 1 << ops->hooknum;
255 if (hook_mask & target->hooks)
256 return 0;
257
258 /* This target is being called from an invalid chain */
259 return -EINVAL;
260 }
261 return 0;
262}
263
264static void nft_match_eval(const struct nft_expr *expr,
265 struct nft_data data[NFT_REG_MAX + 1],
266 const struct nft_pktinfo *pkt)
267{
268 void *info = nft_expr_priv(expr);
269 struct xt_match *match = expr->ops->data;
270 struct sk_buff *skb = pkt->skb;
271 bool ret;
272
273 nft_compat_set_par((struct xt_action_param *)&pkt->xt, match, info);
274
275 ret = match->match(skb, (struct xt_action_param *)&pkt->xt);
276
277 if (pkt->xt.hotdrop) {
278 data[NFT_REG_VERDICT].verdict = NF_DROP;
279 return;
280 }
281
282 switch(ret) {
283 case true:
284 data[NFT_REG_VERDICT].verdict = NFT_CONTINUE;
285 break;
286 case false:
287 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
288 break;
289 }
290}
291
292static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
293 [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
294 [NFTA_MATCH_REV] = { .type = NLA_U32 },
295 [NFTA_MATCH_INFO] = { .type = NLA_BINARY },
296};
297
298/* struct xt_mtchk_param and xt_tgchk_param look very similar */
299static void
300nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
301 struct xt_match *match, void *info,
302 union nft_entry *entry, u8 proto, bool inv)
303{
304 par->net = &init_net;
305 par->table = ctx->table->name;
306 switch (ctx->afi->family) {
307 case AF_INET:
308 entry->e4.ip.proto = proto;
309 entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
310 break;
311 case AF_INET6:
312 entry->e6.ipv6.proto = proto;
313 entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
314 break;
315 }
316 par->entryinfo = entry;
317 par->match = match;
318 par->matchinfo = info;
319 if (ctx->chain->flags & NFT_BASE_CHAIN) {
320 const struct nft_base_chain *basechain =
321 nft_base_chain(ctx->chain);
322 const struct nf_hook_ops *ops = &basechain->ops;
323
324 par->hook_mask = 1 << ops->hooknum;
325 }
326 par->family = ctx->afi->family;
327}
328
329static void match_compat_from_user(struct xt_match *m, void *in, void *out)
330{
331#ifdef CONFIG_COMPAT
332 if (m->compat_from_user) {
333 int pad;
334
335 m->compat_from_user(out, in);
336 pad = XT_ALIGN(m->matchsize) - m->matchsize;
337 if (pad > 0)
338 memset(out + m->matchsize, 0, pad);
339 } else
340#endif
341 memcpy(out, in, XT_ALIGN(m->matchsize));
342}
343
344static int
345nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
346 const struct nlattr * const tb[])
347{
348 void *info = nft_expr_priv(expr);
349 struct xt_match *match = expr->ops->data;
350 struct xt_mtchk_param par;
351 size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
352 u8 proto = 0;
353 bool inv = false;
354 union nft_entry e = {};
355 int ret;
356
357 match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
358
359 if (ctx->nla[NFTA_RULE_COMPAT])
360 proto = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &inv);
361
362 nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
363
364 ret = xt_check_match(&par, size, proto, inv);
365 if (ret < 0)
366 goto err;
367
368 return 0;
369err:
370 module_put(match->me);
371 return ret;
372}
373
374static void
375nft_match_destroy(const struct nft_expr *expr)
376{
377 struct xt_match *match = expr->ops->data;
378
379 module_put(match->me);
380}
381
382static int
383match_dump_info(struct sk_buff *skb, const struct xt_match *m, const void *in)
384{
385 int ret;
386
387#ifdef CONFIG_COMPAT
388 if (m->compat_to_user) {
389 mm_segment_t old_fs;
390 void *out;
391
392 out = kmalloc(XT_ALIGN(m->matchsize), GFP_ATOMIC);
393 if (out == NULL)
394 return -ENOMEM;
395
396 /* We want to reuse existing compat_to_user */
397 old_fs = get_fs();
398 set_fs(KERNEL_DS);
399 m->compat_to_user(out, in);
400 set_fs(old_fs);
401 ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), out);
402 kfree(out);
403 } else
404#endif
405 ret = nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(m->matchsize), in);
406
407 return ret;
408}
409
410static inline int nft_compat_match_offset(struct xt_match *match)
411{
412#ifdef CONFIG_COMPAT
413 return xt_compat_match_offset(match);
414#else
415 return 0;
416#endif
417}
418
419static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
420{
421 void *info = nft_expr_priv(expr);
422 struct xt_match *match = expr->ops->data;
423
424 if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
425 nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) ||
426 match_dump_info(skb, match, info))
427 goto nla_put_failure;
428
429 return 0;
430
431nla_put_failure:
432 return -1;
433}
434
435static int nft_match_validate(const struct nft_ctx *ctx,
436 const struct nft_expr *expr,
437 const struct nft_data **data)
438{
439 struct xt_match *match = expr->ops->data;
440 unsigned int hook_mask = 0;
441
442 if (ctx->chain->flags & NFT_BASE_CHAIN) {
443 const struct nft_base_chain *basechain =
444 nft_base_chain(ctx->chain);
445 const struct nf_hook_ops *ops = &basechain->ops;
446
447 hook_mask = 1 << ops->hooknum;
448 if (hook_mask & match->hooks)
449 return 0;
450
451 /* This match is being called from an invalid chain */
452 return -EINVAL;
453 }
454 return 0;
455}
456
457static int
458nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
459 int event, u16 family, const char *name,
460 int rev, int target)
461{
462 struct nlmsghdr *nlh;
463 struct nfgenmsg *nfmsg;
464 unsigned int flags = portid ? NLM_F_MULTI : 0;
465
466 event |= NFNL_SUBSYS_NFT_COMPAT << 8;
467 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
468 if (nlh == NULL)
469 goto nlmsg_failure;
470
471 nfmsg = nlmsg_data(nlh);
472 nfmsg->nfgen_family = family;
473 nfmsg->version = NFNETLINK_V0;
474 nfmsg->res_id = 0;
475
476 if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
477 nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
478 nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
479 goto nla_put_failure;
480
481 nlmsg_end(skb, nlh);
482 return skb->len;
483
484nlmsg_failure:
485nla_put_failure:
486 nlmsg_cancel(skb, nlh);
487 return -1;
488}
489
490static int
491nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
492 const struct nlmsghdr *nlh, const struct nlattr * const tb[])
493{
494 int ret = 0, target;
495 struct nfgenmsg *nfmsg;
496 const char *fmt;
497 const char *name;
498 u32 rev;
499 struct sk_buff *skb2;
500
501 if (tb[NFTA_COMPAT_NAME] == NULL ||
502 tb[NFTA_COMPAT_REV] == NULL ||
503 tb[NFTA_COMPAT_TYPE] == NULL)
504 return -EINVAL;
505
506 name = nla_data(tb[NFTA_COMPAT_NAME]);
507 rev = ntohl(nla_get_be32(tb[NFTA_COMPAT_REV]));
508 target = ntohl(nla_get_be32(tb[NFTA_COMPAT_TYPE]));
509
510 nfmsg = nlmsg_data(nlh);
511
512 switch(nfmsg->nfgen_family) {
513 case AF_INET:
514 fmt = "ipt_%s";
515 break;
516 case AF_INET6:
517 fmt = "ip6t_%s";
518 break;
519 default:
520 pr_err("nft_compat: unsupported protocol %d\n",
521 nfmsg->nfgen_family);
522 return -EINVAL;
523 }
524
525 try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
526 rev, target, &ret),
527 fmt, name);
528
529 if (ret < 0)
530 return ret;
531
532 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
533 if (skb2 == NULL)
534 return -ENOMEM;
535
536 /* include the best revision for this extension in the message */
537 if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
538 nlh->nlmsg_seq,
539 NFNL_MSG_TYPE(nlh->nlmsg_type),
540 NFNL_MSG_COMPAT_GET,
541 nfmsg->nfgen_family,
542 name, ret, target) <= 0) {
543 kfree_skb(skb2);
544 return -ENOSPC;
545 }
546
547 ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
548 MSG_DONTWAIT);
549 if (ret > 0)
550 ret = 0;
551
552 return ret == -EAGAIN ? -ENOBUFS : ret;
553}
554
555static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
556 [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
557 .len = NFT_COMPAT_NAME_MAX-1 },
558 [NFTA_COMPAT_REV] = { .type = NLA_U32 },
559 [NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
560};
561
562static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
563 [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get,
564 .attr_count = NFTA_COMPAT_MAX,
565 .policy = nfnl_compat_policy_get },
566};
567
568static const struct nfnetlink_subsystem nfnl_compat_subsys = {
569 .name = "nft-compat",
570 .subsys_id = NFNL_SUBSYS_NFT_COMPAT,
571 .cb_count = NFNL_MSG_COMPAT_MAX,
572 .cb = nfnl_nft_compat_cb,
573};
574
575static LIST_HEAD(nft_match_list);
576
577struct nft_xt {
578 struct list_head head;
579 struct nft_expr_ops ops;
580};
581
582static struct nft_expr_type nft_match_type;
583
584static const struct nft_expr_ops *
585nft_match_select_ops(const struct nft_ctx *ctx,
586 const struct nlattr * const tb[])
587{
588 struct nft_xt *nft_match;
589 struct xt_match *match;
590 char *mt_name;
591 __u32 rev, family;
592
593 if (tb[NFTA_MATCH_NAME] == NULL ||
594 tb[NFTA_MATCH_REV] == NULL ||
595 tb[NFTA_MATCH_INFO] == NULL)
596 return ERR_PTR(-EINVAL);
597
598 mt_name = nla_data(tb[NFTA_MATCH_NAME]);
599 rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
600 family = ctx->afi->family;
601
602 /* Re-use the existing match if it's already loaded. */
603 list_for_each_entry(nft_match, &nft_match_list, head) {
604 struct xt_match *match = nft_match->ops.data;
605
606 if (strcmp(match->name, mt_name) == 0 &&
607 match->revision == rev && match->family == family)
608 return &nft_match->ops;
609 }
610
611 match = xt_request_find_match(family, mt_name, rev);
612 if (IS_ERR(match))
613 return ERR_PTR(-ENOENT);
614
615 /* This is the first time we use this match, allocate operations */
616 nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
617 if (nft_match == NULL)
618 return ERR_PTR(-ENOMEM);
619
620 nft_match->ops.type = &nft_match_type;
621 nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize) +
622 nft_compat_match_offset(match));
623 nft_match->ops.eval = nft_match_eval;
624 nft_match->ops.init = nft_match_init;
625 nft_match->ops.destroy = nft_match_destroy;
626 nft_match->ops.dump = nft_match_dump;
627 nft_match->ops.validate = nft_match_validate;
628 nft_match->ops.data = match;
629
630 list_add(&nft_match->head, &nft_match_list);
631
632 return &nft_match->ops;
633}
634
635static void nft_match_release(void)
636{
637 struct nft_xt *nft_match;
638
639 list_for_each_entry(nft_match, &nft_match_list, head)
640 kfree(nft_match);
641}
642
643static struct nft_expr_type nft_match_type __read_mostly = {
644 .name = "match",
645 .select_ops = nft_match_select_ops,
646 .policy = nft_match_policy,
647 .maxattr = NFTA_MATCH_MAX,
648 .owner = THIS_MODULE,
649};
650
651static LIST_HEAD(nft_target_list);
652
653static struct nft_expr_type nft_target_type;
654
655static const struct nft_expr_ops *
656nft_target_select_ops(const struct nft_ctx *ctx,
657 const struct nlattr * const tb[])
658{
659 struct nft_xt *nft_target;
660 struct xt_target *target;
661 char *tg_name;
662 __u32 rev, family;
663
664 if (tb[NFTA_TARGET_NAME] == NULL ||
665 tb[NFTA_TARGET_REV] == NULL ||
666 tb[NFTA_TARGET_INFO] == NULL)
667 return ERR_PTR(-EINVAL);
668
669 tg_name = nla_data(tb[NFTA_TARGET_NAME]);
670 rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
671 family = ctx->afi->family;
672
673 /* Re-use the existing target if it's already loaded. */
674 list_for_each_entry(nft_target, &nft_match_list, head) {
675 struct xt_target *target = nft_target->ops.data;
676
677 if (strcmp(target->name, tg_name) == 0 &&
678 target->revision == rev && target->family == family)
679 return &nft_target->ops;
680 }
681
682 target = xt_request_find_target(family, tg_name, rev);
683 if (IS_ERR(target))
684 return ERR_PTR(-ENOENT);
685
686 /* This is the first time we use this target, allocate operations */
687 nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
688 if (nft_target == NULL)
689 return ERR_PTR(-ENOMEM);
690
691 nft_target->ops.type = &nft_target_type;
692 nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize) +
693 nft_compat_target_offset(target));
694 nft_target->ops.eval = nft_target_eval;
695 nft_target->ops.init = nft_target_init;
696 nft_target->ops.destroy = nft_target_destroy;
697 nft_target->ops.dump = nft_target_dump;
698 nft_target->ops.validate = nft_target_validate;
699 nft_target->ops.data = target;
700
701 list_add(&nft_target->head, &nft_target_list);
702
703 return &nft_target->ops;
704}
705
706static void nft_target_release(void)
707{
708 struct nft_xt *nft_target;
709
710 list_for_each_entry(nft_target, &nft_target_list, head)
711 kfree(nft_target);
712}
713
714static struct nft_expr_type nft_target_type __read_mostly = {
715 .name = "target",
716 .select_ops = nft_target_select_ops,
717 .policy = nft_target_policy,
718 .maxattr = NFTA_TARGET_MAX,
719 .owner = THIS_MODULE,
720};
721
722static int __init nft_compat_module_init(void)
723{
724 int ret;
725
726 ret = nft_register_expr(&nft_match_type);
727 if (ret < 0)
728 return ret;
729
730 ret = nft_register_expr(&nft_target_type);
731 if (ret < 0)
732 goto err_match;
733
734 ret = nfnetlink_subsys_register(&nfnl_compat_subsys);
735 if (ret < 0) {
736 pr_err("nft_compat: cannot register with nfnetlink.\n");
737 goto err_target;
738 }
739
740 pr_info("nf_tables_compat: (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>\n");
741
742 return ret;
743
744err_target:
745 nft_unregister_expr(&nft_target_type);
746err_match:
747 nft_unregister_expr(&nft_match_type);
748 return ret;
749}
750
751static void __exit nft_compat_module_exit(void)
752{
753 nfnetlink_subsys_unregister(&nfnl_compat_subsys);
754 nft_unregister_expr(&nft_target_type);
755 nft_unregister_expr(&nft_match_type);
756 nft_match_release();
757 nft_target_release();
758}
759
760MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
761
762module_init(nft_compat_module_init);
763module_exit(nft_compat_module_exit);
764
765MODULE_LICENSE("GPL");
766MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
767MODULE_ALIAS_NFT_EXPR("match");
768MODULE_ALIAS_NFT_EXPR("target");
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
new file mode 100644
index 000000000000..c89ee486ce54
--- /dev/null
+++ b/net/netfilter/nft_counter.c
@@ -0,0 +1,113 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/seqlock.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nf_tables.h>
18#include <net/netfilter/nf_tables.h>
19
20struct nft_counter {
21 seqlock_t lock;
22 u64 bytes;
23 u64 packets;
24};
25
26static void nft_counter_eval(const struct nft_expr *expr,
27 struct nft_data data[NFT_REG_MAX + 1],
28 const struct nft_pktinfo *pkt)
29{
30 struct nft_counter *priv = nft_expr_priv(expr);
31
32 write_seqlock_bh(&priv->lock);
33 priv->bytes += pkt->skb->len;
34 priv->packets++;
35 write_sequnlock_bh(&priv->lock);
36}
37
38static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
39{
40 struct nft_counter *priv = nft_expr_priv(expr);
41 unsigned int seq;
42 u64 bytes;
43 u64 packets;
44
45 do {
46 seq = read_seqbegin(&priv->lock);
47 bytes = priv->bytes;
48 packets = priv->packets;
49 } while (read_seqretry(&priv->lock, seq));
50
51 if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
52 goto nla_put_failure;
53 if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
54 goto nla_put_failure;
55 return 0;
56
57nla_put_failure:
58 return -1;
59}
60
61static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = {
62 [NFTA_COUNTER_PACKETS] = { .type = NLA_U64 },
63 [NFTA_COUNTER_BYTES] = { .type = NLA_U64 },
64};
65
66static int nft_counter_init(const struct nft_ctx *ctx,
67 const struct nft_expr *expr,
68 const struct nlattr * const tb[])
69{
70 struct nft_counter *priv = nft_expr_priv(expr);
71
72 if (tb[NFTA_COUNTER_PACKETS])
73 priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
74 if (tb[NFTA_COUNTER_BYTES])
75 priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
76
77 seqlock_init(&priv->lock);
78 return 0;
79}
80
81static struct nft_expr_type nft_counter_type;
82static const struct nft_expr_ops nft_counter_ops = {
83 .type = &nft_counter_type,
84 .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
85 .eval = nft_counter_eval,
86 .init = nft_counter_init,
87 .dump = nft_counter_dump,
88};
89
90static struct nft_expr_type nft_counter_type __read_mostly = {
91 .name = "counter",
92 .ops = &nft_counter_ops,
93 .policy = nft_counter_policy,
94 .maxattr = NFTA_COUNTER_MAX,
95 .owner = THIS_MODULE,
96};
97
98static int __init nft_counter_module_init(void)
99{
100 return nft_register_expr(&nft_counter_type);
101}
102
103static void __exit nft_counter_module_exit(void)
104{
105 nft_unregister_expr(&nft_counter_type);
106}
107
108module_init(nft_counter_module_init);
109module_exit(nft_counter_module_exit);
110
111MODULE_LICENSE("GPL");
112MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
113MODULE_ALIAS_NFT_EXPR("counter");
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
new file mode 100644
index 000000000000..955f4e6e7089
--- /dev/null
+++ b/net/netfilter/nft_ct.c
@@ -0,0 +1,258 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables.h>
18#include <net/netfilter/nf_conntrack.h>
19#include <net/netfilter/nf_conntrack_tuple.h>
20#include <net/netfilter/nf_conntrack_helper.h>
21
22struct nft_ct {
23 enum nft_ct_keys key:8;
24 enum ip_conntrack_dir dir:8;
25 enum nft_registers dreg:8;
26 uint8_t family;
27};
28
29static void nft_ct_eval(const struct nft_expr *expr,
30 struct nft_data data[NFT_REG_MAX + 1],
31 const struct nft_pktinfo *pkt)
32{
33 const struct nft_ct *priv = nft_expr_priv(expr);
34 struct nft_data *dest = &data[priv->dreg];
35 enum ip_conntrack_info ctinfo;
36 const struct nf_conn *ct;
37 const struct nf_conn_help *help;
38 const struct nf_conntrack_tuple *tuple;
39 const struct nf_conntrack_helper *helper;
40 long diff;
41 unsigned int state;
42
43 ct = nf_ct_get(pkt->skb, &ctinfo);
44
45 switch (priv->key) {
46 case NFT_CT_STATE:
47 if (ct == NULL)
48 state = NF_CT_STATE_INVALID_BIT;
49 else if (nf_ct_is_untracked(ct))
50 state = NF_CT_STATE_UNTRACKED_BIT;
51 else
52 state = NF_CT_STATE_BIT(ctinfo);
53 dest->data[0] = state;
54 return;
55 }
56
57 if (ct == NULL)
58 goto err;
59
60 switch (priv->key) {
61 case NFT_CT_DIRECTION:
62 dest->data[0] = CTINFO2DIR(ctinfo);
63 return;
64 case NFT_CT_STATUS:
65 dest->data[0] = ct->status;
66 return;
67#ifdef CONFIG_NF_CONNTRACK_MARK
68 case NFT_CT_MARK:
69 dest->data[0] = ct->mark;
70 return;
71#endif
72#ifdef CONFIG_NF_CONNTRACK_SECMARK
73 case NFT_CT_SECMARK:
74 dest->data[0] = ct->secmark;
75 return;
76#endif
77 case NFT_CT_EXPIRATION:
78 diff = (long)jiffies - (long)ct->timeout.expires;
79 if (diff < 0)
80 diff = 0;
81 dest->data[0] = jiffies_to_msecs(diff);
82 return;
83 case NFT_CT_HELPER:
84 if (ct->master == NULL)
85 goto err;
86 help = nfct_help(ct->master);
87 if (help == NULL)
88 goto err;
89 helper = rcu_dereference(help->helper);
90 if (helper == NULL)
91 goto err;
92 if (strlen(helper->name) >= sizeof(dest->data))
93 goto err;
94 strncpy((char *)dest->data, helper->name, sizeof(dest->data));
95 return;
96 }
97
98 tuple = &ct->tuplehash[priv->dir].tuple;
99 switch (priv->key) {
100 case NFT_CT_L3PROTOCOL:
101 dest->data[0] = nf_ct_l3num(ct);
102 return;
103 case NFT_CT_SRC:
104 memcpy(dest->data, tuple->src.u3.all,
105 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
106 return;
107 case NFT_CT_DST:
108 memcpy(dest->data, tuple->dst.u3.all,
109 nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
110 return;
111 case NFT_CT_PROTOCOL:
112 dest->data[0] = nf_ct_protonum(ct);
113 return;
114 case NFT_CT_PROTO_SRC:
115 dest->data[0] = (__force __u16)tuple->src.u.all;
116 return;
117 case NFT_CT_PROTO_DST:
118 dest->data[0] = (__force __u16)tuple->dst.u.all;
119 return;
120 }
121 return;
122err:
123 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
124}
125
126static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
127 [NFTA_CT_DREG] = { .type = NLA_U32 },
128 [NFTA_CT_KEY] = { .type = NLA_U32 },
129 [NFTA_CT_DIRECTION] = { .type = NLA_U8 },
130};
131
132static int nft_ct_init(const struct nft_ctx *ctx,
133 const struct nft_expr *expr,
134 const struct nlattr * const tb[])
135{
136 struct nft_ct *priv = nft_expr_priv(expr);
137 int err;
138
139 if (tb[NFTA_CT_DREG] == NULL ||
140 tb[NFTA_CT_KEY] == NULL)
141 return -EINVAL;
142
143 priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
144 if (tb[NFTA_CT_DIRECTION] != NULL) {
145 priv->dir = nla_get_u8(tb[NFTA_CT_DIRECTION]);
146 switch (priv->dir) {
147 case IP_CT_DIR_ORIGINAL:
148 case IP_CT_DIR_REPLY:
149 break;
150 default:
151 return -EINVAL;
152 }
153 }
154
155 switch (priv->key) {
156 case NFT_CT_STATE:
157 case NFT_CT_DIRECTION:
158 case NFT_CT_STATUS:
159#ifdef CONFIG_NF_CONNTRACK_MARK
160 case NFT_CT_MARK:
161#endif
162#ifdef CONFIG_NF_CONNTRACK_SECMARK
163 case NFT_CT_SECMARK:
164#endif
165 case NFT_CT_EXPIRATION:
166 case NFT_CT_HELPER:
167 if (tb[NFTA_CT_DIRECTION] != NULL)
168 return -EINVAL;
169 break;
170 case NFT_CT_PROTOCOL:
171 case NFT_CT_SRC:
172 case NFT_CT_DST:
173 case NFT_CT_PROTO_SRC:
174 case NFT_CT_PROTO_DST:
175 if (tb[NFTA_CT_DIRECTION] == NULL)
176 return -EINVAL;
177 break;
178 default:
179 return -EOPNOTSUPP;
180 }
181
182 err = nf_ct_l3proto_try_module_get(ctx->afi->family);
183 if (err < 0)
184 return err;
185 priv->family = ctx->afi->family;
186
187 priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG]));
188 err = nft_validate_output_register(priv->dreg);
189 if (err < 0)
190 goto err1;
191
192 err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
193 if (err < 0)
194 goto err1;
195 return 0;
196
197err1:
198 nf_ct_l3proto_module_put(ctx->afi->family);
199 return err;
200}
201
202static void nft_ct_destroy(const struct nft_expr *expr)
203{
204 struct nft_ct *priv = nft_expr_priv(expr);
205
206 nf_ct_l3proto_module_put(priv->family);
207}
208
209static int nft_ct_dump(struct sk_buff *skb, const struct nft_expr *expr)
210{
211 const struct nft_ct *priv = nft_expr_priv(expr);
212
213 if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg)))
214 goto nla_put_failure;
215 if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
216 goto nla_put_failure;
217 if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
218 goto nla_put_failure;
219 return 0;
220
221nla_put_failure:
222 return -1;
223}
224
225static struct nft_expr_type nft_ct_type;
226static const struct nft_expr_ops nft_ct_ops = {
227 .type = &nft_ct_type,
228 .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
229 .eval = nft_ct_eval,
230 .init = nft_ct_init,
231 .destroy = nft_ct_destroy,
232 .dump = nft_ct_dump,
233};
234
235static struct nft_expr_type nft_ct_type __read_mostly = {
236 .name = "ct",
237 .ops = &nft_ct_ops,
238 .policy = nft_ct_policy,
239 .maxattr = NFTA_CT_MAX,
240 .owner = THIS_MODULE,
241};
242
243static int __init nft_ct_module_init(void)
244{
245 return nft_register_expr(&nft_ct_type);
246}
247
248static void __exit nft_ct_module_exit(void)
249{
250 nft_unregister_expr(&nft_ct_type);
251}
252
253module_init(nft_ct_module_init);
254module_exit(nft_ct_module_exit);
255
256MODULE_LICENSE("GPL");
257MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
258MODULE_ALIAS_NFT_EXPR("ct");
diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c
new file mode 100644
index 000000000000..b6eed4d5a096
--- /dev/null
+++ b/net/netfilter/nft_expr_template.c
@@ -0,0 +1,94 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/netlink.h>
14#include <linux/netfilter.h>
15#include <linux/netfilter/nf_tables.h>
16#include <net/netfilter/nf_tables.h>
17
18struct nft_template {
19
20};
21
22static void nft_template_eval(const struct nft_expr *expr,
23 struct nft_data data[NFT_REG_MAX + 1],
24 const struct nft_pktinfo *pkt)
25{
26 struct nft_template *priv = nft_expr_priv(expr);
27
28}
29
30static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = {
31 [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 },
32};
33
34static int nft_template_init(const struct nft_ctx *ctx,
35 const struct nft_expr *expr,
36 const struct nlattr * const tb[])
37{
38 struct nft_template *priv = nft_expr_priv(expr);
39
40 return 0;
41}
42
43static void nft_template_destroy(const struct nft_ctx *ctx,
44 const struct nft_expr *expr)
45{
46 struct nft_template *priv = nft_expr_priv(expr);
47
48}
49
50static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr)
51{
52 const struct nft_template *priv = nft_expr_priv(expr);
53
54 NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field);
55 return 0;
56
57nla_put_failure:
58 return -1;
59}
60
61static struct nft_expr_type nft_template_type;
62static const struct nft_expr_ops nft_template_ops = {
63 .type = &nft_template_type,
64 .size = NFT_EXPR_SIZE(sizeof(struct nft_template)),
65 .eval = nft_template_eval,
66 .init = nft_template_init,
67 .destroy = nft_template_destroy,
68 .dump = nft_template_dump,
69};
70
71static struct nft_expr_type nft_template_type __read_mostly = {
72 .name = "template",
73 .ops = &nft_template_ops,
74 .policy = nft_template_policy,
75 .maxattr = NFTA_TEMPLATE_MAX,
76 .owner = THIS_MODULE,
77};
78
79static int __init nft_template_module_init(void)
80{
81 return nft_register_expr(&nft_template_type);
82}
83
84static void __exit nft_template_module_exit(void)
85{
86 nft_unregister_expr(&nft_template_type);
87}
88
89module_init(nft_template_module_init);
90module_exit(nft_template_module_exit);
91
92MODULE_LICENSE("GPL");
93MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
94MODULE_ALIAS_NFT_EXPR("template");
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
new file mode 100644
index 000000000000..8e0bb75e7c51
--- /dev/null
+++ b/net/netfilter/nft_exthdr.c
@@ -0,0 +1,133 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables.h>
18// FIXME:
19#include <net/ipv6.h>
20
21struct nft_exthdr {
22 u8 type;
23 u8 offset;
24 u8 len;
25 enum nft_registers dreg:8;
26};
27
28static void nft_exthdr_eval(const struct nft_expr *expr,
29 struct nft_data data[NFT_REG_MAX + 1],
30 const struct nft_pktinfo *pkt)
31{
32 struct nft_exthdr *priv = nft_expr_priv(expr);
33 struct nft_data *dest = &data[priv->dreg];
34 unsigned int offset;
35 int err;
36
37 err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
38 if (err < 0)
39 goto err;
40 offset += priv->offset;
41
42 if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0)
43 goto err;
44 return;
45err:
46 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
47}
48
49static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
50 [NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
51 [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
52 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
53 [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
54};
55
56static int nft_exthdr_init(const struct nft_ctx *ctx,
57 const struct nft_expr *expr,
58 const struct nlattr * const tb[])
59{
60 struct nft_exthdr *priv = nft_expr_priv(expr);
61 int err;
62
63 if (tb[NFTA_EXTHDR_DREG] == NULL ||
64 tb[NFTA_EXTHDR_TYPE] == NULL ||
65 tb[NFTA_EXTHDR_OFFSET] == NULL ||
66 tb[NFTA_EXTHDR_LEN] == NULL)
67 return -EINVAL;
68
69 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
70 priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
71 priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
72 if (priv->len == 0 ||
73 priv->len > FIELD_SIZEOF(struct nft_data, data))
74 return -EINVAL;
75
76 priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG]));
77 err = nft_validate_output_register(priv->dreg);
78 if (err < 0)
79 return err;
80 return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
81}
82
83static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr)
84{
85 const struct nft_exthdr *priv = nft_expr_priv(expr);
86
87 if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg)))
88 goto nla_put_failure;
89 if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
90 goto nla_put_failure;
91 if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
92 goto nla_put_failure;
93 if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
94 goto nla_put_failure;
95 return 0;
96
97nla_put_failure:
98 return -1;
99}
100
101static struct nft_expr_type nft_exthdr_type;
102static const struct nft_expr_ops nft_exthdr_ops = {
103 .type = &nft_exthdr_type,
104 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
105 .eval = nft_exthdr_eval,
106 .init = nft_exthdr_init,
107 .dump = nft_exthdr_dump,
108};
109
110static struct nft_expr_type nft_exthdr_type __read_mostly = {
111 .name = "exthdr",
112 .ops = &nft_exthdr_ops,
113 .policy = nft_exthdr_policy,
114 .maxattr = NFTA_EXTHDR_MAX,
115 .owner = THIS_MODULE,
116};
117
118static int __init nft_exthdr_module_init(void)
119{
120 return nft_register_expr(&nft_exthdr_type);
121}
122
123static void __exit nft_exthdr_module_exit(void)
124{
125 nft_unregister_expr(&nft_exthdr_type);
126}
127
128module_init(nft_exthdr_module_init);
129module_exit(nft_exthdr_module_exit);
130
131MODULE_LICENSE("GPL");
132MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
133MODULE_ALIAS_NFT_EXPR("exthdr");
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
new file mode 100644
index 000000000000..3d3f8fce10a5
--- /dev/null
+++ b/net/netfilter/nft_hash.c
@@ -0,0 +1,231 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/list.h>
15#include <linux/jhash.h>
16#include <linux/netlink.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables.h>
20
21struct nft_hash {
22 struct hlist_head *hash;
23 unsigned int hsize;
24};
25
26struct nft_hash_elem {
27 struct hlist_node hnode;
28 struct nft_data key;
29 struct nft_data data[];
30};
31
32static u32 nft_hash_rnd __read_mostly;
33static bool nft_hash_rnd_initted __read_mostly;
34
35static unsigned int nft_hash_data(const struct nft_data *data,
36 unsigned int hsize, unsigned int len)
37{
38 unsigned int h;
39
40 h = jhash(data->data, len, nft_hash_rnd);
41 return ((u64)h * hsize) >> 32;
42}
43
44static bool nft_hash_lookup(const struct nft_set *set,
45 const struct nft_data *key,
46 struct nft_data *data)
47{
48 const struct nft_hash *priv = nft_set_priv(set);
49 const struct nft_hash_elem *he;
50 unsigned int h;
51
52 h = nft_hash_data(key, priv->hsize, set->klen);
53 hlist_for_each_entry(he, &priv->hash[h], hnode) {
54 if (nft_data_cmp(&he->key, key, set->klen))
55 continue;
56 if (set->flags & NFT_SET_MAP)
57 nft_data_copy(data, he->data);
58 return true;
59 }
60 return false;
61}
62
63static void nft_hash_elem_destroy(const struct nft_set *set,
64 struct nft_hash_elem *he)
65{
66 nft_data_uninit(&he->key, NFT_DATA_VALUE);
67 if (set->flags & NFT_SET_MAP)
68 nft_data_uninit(he->data, set->dtype);
69 kfree(he);
70}
71
72static int nft_hash_insert(const struct nft_set *set,
73 const struct nft_set_elem *elem)
74{
75 struct nft_hash *priv = nft_set_priv(set);
76 struct nft_hash_elem *he;
77 unsigned int size, h;
78
79 if (elem->flags != 0)
80 return -EINVAL;
81
82 size = sizeof(*he);
83 if (set->flags & NFT_SET_MAP)
84 size += sizeof(he->data[0]);
85
86 he = kzalloc(size, GFP_KERNEL);
87 if (he == NULL)
88 return -ENOMEM;
89
90 nft_data_copy(&he->key, &elem->key);
91 if (set->flags & NFT_SET_MAP)
92 nft_data_copy(he->data, &elem->data);
93
94 h = nft_hash_data(&he->key, priv->hsize, set->klen);
95 hlist_add_head_rcu(&he->hnode, &priv->hash[h]);
96 return 0;
97}
98
99static void nft_hash_remove(const struct nft_set *set,
100 const struct nft_set_elem *elem)
101{
102 struct nft_hash_elem *he = elem->cookie;
103
104 hlist_del_rcu(&he->hnode);
105 kfree(he);
106}
107
108static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
109{
110 const struct nft_hash *priv = nft_set_priv(set);
111 struct nft_hash_elem *he;
112 unsigned int h;
113
114 h = nft_hash_data(&elem->key, priv->hsize, set->klen);
115 hlist_for_each_entry(he, &priv->hash[h], hnode) {
116 if (nft_data_cmp(&he->key, &elem->key, set->klen))
117 continue;
118
119 elem->cookie = he;
120 elem->flags = 0;
121 if (set->flags & NFT_SET_MAP)
122 nft_data_copy(&elem->data, he->data);
123 return 0;
124 }
125 return -ENOENT;
126}
127
128static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
129 struct nft_set_iter *iter)
130{
131 const struct nft_hash *priv = nft_set_priv(set);
132 const struct nft_hash_elem *he;
133 struct nft_set_elem elem;
134 unsigned int i;
135
136 for (i = 0; i < priv->hsize; i++) {
137 hlist_for_each_entry(he, &priv->hash[i], hnode) {
138 if (iter->count < iter->skip)
139 goto cont;
140
141 memcpy(&elem.key, &he->key, sizeof(elem.key));
142 if (set->flags & NFT_SET_MAP)
143 memcpy(&elem.data, he->data, sizeof(elem.data));
144 elem.flags = 0;
145
146 iter->err = iter->fn(ctx, set, iter, &elem);
147 if (iter->err < 0)
148 return;
149cont:
150 iter->count++;
151 }
152 }
153}
154
155static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
156{
157 return sizeof(struct nft_hash);
158}
159
160static int nft_hash_init(const struct nft_set *set,
161 const struct nlattr * const tb[])
162{
163 struct nft_hash *priv = nft_set_priv(set);
164 unsigned int cnt, i;
165
166 if (unlikely(!nft_hash_rnd_initted)) {
167 get_random_bytes(&nft_hash_rnd, 4);
168 nft_hash_rnd_initted = true;
169 }
170
171 /* Aim for a load factor of 0.75 */
172 // FIXME: temporarily broken until we have set descriptions
173 cnt = 100;
174 cnt = cnt * 4 / 3;
175
176 priv->hash = kcalloc(cnt, sizeof(struct hlist_head), GFP_KERNEL);
177 if (priv->hash == NULL)
178 return -ENOMEM;
179 priv->hsize = cnt;
180
181 for (i = 0; i < cnt; i++)
182 INIT_HLIST_HEAD(&priv->hash[i]);
183
184 return 0;
185}
186
187static void nft_hash_destroy(const struct nft_set *set)
188{
189 const struct nft_hash *priv = nft_set_priv(set);
190 const struct hlist_node *next;
191 struct nft_hash_elem *elem;
192 unsigned int i;
193
194 for (i = 0; i < priv->hsize; i++) {
195 hlist_for_each_entry_safe(elem, next, &priv->hash[i], hnode) {
196 hlist_del(&elem->hnode);
197 nft_hash_elem_destroy(set, elem);
198 }
199 }
200 kfree(priv->hash);
201}
202
203static struct nft_set_ops nft_hash_ops __read_mostly = {
204 .privsize = nft_hash_privsize,
205 .init = nft_hash_init,
206 .destroy = nft_hash_destroy,
207 .get = nft_hash_get,
208 .insert = nft_hash_insert,
209 .remove = nft_hash_remove,
210 .lookup = nft_hash_lookup,
211 .walk = nft_hash_walk,
212 .features = NFT_SET_MAP,
213 .owner = THIS_MODULE,
214};
215
216static int __init nft_hash_module_init(void)
217{
218 return nft_register_set(&nft_hash_ops);
219}
220
221static void __exit nft_hash_module_exit(void)
222{
223 nft_unregister_set(&nft_hash_ops);
224}
225
226module_init(nft_hash_module_init);
227module_exit(nft_hash_module_exit);
228
229MODULE_LICENSE("GPL");
230MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
231MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
new file mode 100644
index 000000000000..f169501f1ad4
--- /dev/null
+++ b/net/netfilter/nft_immediate.c
@@ -0,0 +1,132 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables_core.h>
18#include <net/netfilter/nf_tables.h>
19
20struct nft_immediate_expr {
21 struct nft_data data;
22 enum nft_registers dreg:8;
23 u8 dlen;
24};
25
26static void nft_immediate_eval(const struct nft_expr *expr,
27 struct nft_data data[NFT_REG_MAX + 1],
28 const struct nft_pktinfo *pkt)
29{
30 const struct nft_immediate_expr *priv = nft_expr_priv(expr);
31
32 nft_data_copy(&data[priv->dreg], &priv->data);
33}
34
35static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
36 [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 },
37 [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED },
38};
39
40static int nft_immediate_init(const struct nft_ctx *ctx,
41 const struct nft_expr *expr,
42 const struct nlattr * const tb[])
43{
44 struct nft_immediate_expr *priv = nft_expr_priv(expr);
45 struct nft_data_desc desc;
46 int err;
47
48 if (tb[NFTA_IMMEDIATE_DREG] == NULL ||
49 tb[NFTA_IMMEDIATE_DATA] == NULL)
50 return -EINVAL;
51
52 priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG]));
53 err = nft_validate_output_register(priv->dreg);
54 if (err < 0)
55 return err;
56
57 err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]);
58 if (err < 0)
59 return err;
60 priv->dlen = desc.len;
61
62 err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type);
63 if (err < 0)
64 goto err1;
65
66 return 0;
67
68err1:
69 nft_data_uninit(&priv->data, desc.type);
70 return err;
71}
72
73static void nft_immediate_destroy(const struct nft_expr *expr)
74{
75 const struct nft_immediate_expr *priv = nft_expr_priv(expr);
76 return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg));
77}
78
79static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
80{
81 const struct nft_immediate_expr *priv = nft_expr_priv(expr);
82
83 if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg)))
84 goto nla_put_failure;
85
86 return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data,
87 nft_dreg_to_type(priv->dreg), priv->dlen);
88
89nla_put_failure:
90 return -1;
91}
92
93static int nft_immediate_validate(const struct nft_ctx *ctx,
94 const struct nft_expr *expr,
95 const struct nft_data **data)
96{
97 const struct nft_immediate_expr *priv = nft_expr_priv(expr);
98
99 if (priv->dreg == NFT_REG_VERDICT)
100 *data = &priv->data;
101
102 return 0;
103}
104
105static struct nft_expr_type nft_imm_type;
106static const struct nft_expr_ops nft_imm_ops = {
107 .type = &nft_imm_type,
108 .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
109 .eval = nft_immediate_eval,
110 .init = nft_immediate_init,
111 .destroy = nft_immediate_destroy,
112 .dump = nft_immediate_dump,
113 .validate = nft_immediate_validate,
114};
115
116static struct nft_expr_type nft_imm_type __read_mostly = {
117 .name = "immediate",
118 .ops = &nft_imm_ops,
119 .policy = nft_immediate_policy,
120 .maxattr = NFTA_IMMEDIATE_MAX,
121 .owner = THIS_MODULE,
122};
123
124int __init nft_immediate_module_init(void)
125{
126 return nft_register_expr(&nft_imm_type);
127}
128
129void nft_immediate_module_exit(void)
130{
131 nft_unregister_expr(&nft_imm_type);
132}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
new file mode 100644
index 000000000000..85da5bd02f64
--- /dev/null
+++ b/net/netfilter/nft_limit.c
@@ -0,0 +1,119 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/spinlock.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nf_tables.h>
18#include <net/netfilter/nf_tables.h>
19
20static DEFINE_SPINLOCK(limit_lock);
21
22struct nft_limit {
23 u64 tokens;
24 u64 rate;
25 u64 unit;
26 unsigned long stamp;
27};
28
29static void nft_limit_eval(const struct nft_expr *expr,
30 struct nft_data data[NFT_REG_MAX + 1],
31 const struct nft_pktinfo *pkt)
32{
33 struct nft_limit *priv = nft_expr_priv(expr);
34
35 spin_lock_bh(&limit_lock);
36 if (time_after_eq(jiffies, priv->stamp)) {
37 priv->tokens = priv->rate;
38 priv->stamp = jiffies + priv->unit * HZ;
39 }
40
41 if (priv->tokens >= 1) {
42 priv->tokens--;
43 spin_unlock_bh(&limit_lock);
44 return;
45 }
46 spin_unlock_bh(&limit_lock);
47
48 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
49}
50
51static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
52 [NFTA_LIMIT_RATE] = { .type = NLA_U64 },
53 [NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
54};
55
56static int nft_limit_init(const struct nft_ctx *ctx,
57 const struct nft_expr *expr,
58 const struct nlattr * const tb[])
59{
60 struct nft_limit *priv = nft_expr_priv(expr);
61
62 if (tb[NFTA_LIMIT_RATE] == NULL ||
63 tb[NFTA_LIMIT_UNIT] == NULL)
64 return -EINVAL;
65
66 priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
67 priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
68 priv->stamp = jiffies + priv->unit * HZ;
69 priv->tokens = priv->rate;
70 return 0;
71}
72
73static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
74{
75 const struct nft_limit *priv = nft_expr_priv(expr);
76
77 if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
78 goto nla_put_failure;
79 if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
80 goto nla_put_failure;
81 return 0;
82
83nla_put_failure:
84 return -1;
85}
86
87static struct nft_expr_type nft_limit_type;
88static const struct nft_expr_ops nft_limit_ops = {
89 .type = &nft_limit_type,
90 .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
91 .eval = nft_limit_eval,
92 .init = nft_limit_init,
93 .dump = nft_limit_dump,
94};
95
96static struct nft_expr_type nft_limit_type __read_mostly = {
97 .name = "limit",
98 .ops = &nft_limit_ops,
99 .policy = nft_limit_policy,
100 .maxattr = NFTA_LIMIT_MAX,
101 .owner = THIS_MODULE,
102};
103
104static int __init nft_limit_module_init(void)
105{
106 return nft_register_expr(&nft_limit_type);
107}
108
109static void __exit nft_limit_module_exit(void)
110{
111 nft_unregister_expr(&nft_limit_type);
112}
113
114module_init(nft_limit_module_init);
115module_exit(nft_limit_module_exit);
116
117MODULE_LICENSE("GPL");
118MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
119MODULE_ALIAS_NFT_EXPR("limit");
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
new file mode 100644
index 000000000000..57cad072a13e
--- /dev/null
+++ b/net/netfilter/nft_log.c
@@ -0,0 +1,146 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables.h>
18#include <net/netfilter/nf_log.h>
19#include <linux/netdevice.h>
20
21static const char *nft_log_null_prefix = "";
22
23struct nft_log {
24 struct nf_loginfo loginfo;
25 char *prefix;
26 int family;
27};
28
29static void nft_log_eval(const struct nft_expr *expr,
30 struct nft_data data[NFT_REG_MAX + 1],
31 const struct nft_pktinfo *pkt)
32{
33 const struct nft_log *priv = nft_expr_priv(expr);
34 struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
35
36 nf_log_packet(net, priv->family, pkt->hooknum, pkt->skb, pkt->in,
37 pkt->out, &priv->loginfo, "%s", priv->prefix);
38}
39
40static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = {
41 [NFTA_LOG_GROUP] = { .type = NLA_U16 },
42 [NFTA_LOG_PREFIX] = { .type = NLA_STRING },
43 [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 },
44 [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 },
45};
46
47static int nft_log_init(const struct nft_ctx *ctx,
48 const struct nft_expr *expr,
49 const struct nlattr * const tb[])
50{
51 struct nft_log *priv = nft_expr_priv(expr);
52 struct nf_loginfo *li = &priv->loginfo;
53 const struct nlattr *nla;
54
55 priv->family = ctx->afi->family;
56
57 nla = tb[NFTA_LOG_PREFIX];
58 if (nla != NULL) {
59 priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
60 if (priv->prefix == NULL)
61 return -ENOMEM;
62 nla_strlcpy(priv->prefix, nla, nla_len(nla) + 1);
63 } else
64 priv->prefix = (char *)nft_log_null_prefix;
65
66 li->type = NF_LOG_TYPE_ULOG;
67 if (tb[NFTA_LOG_GROUP] != NULL)
68 li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
69
70 if (tb[NFTA_LOG_SNAPLEN] != NULL)
71 li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
72 if (tb[NFTA_LOG_QTHRESHOLD] != NULL) {
73 li->u.ulog.qthreshold =
74 ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD]));
75 }
76
77 return 0;
78}
79
80static void nft_log_destroy(const struct nft_expr *expr)
81{
82 struct nft_log *priv = nft_expr_priv(expr);
83
84 if (priv->prefix != nft_log_null_prefix)
85 kfree(priv->prefix);
86}
87
88static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
89{
90 const struct nft_log *priv = nft_expr_priv(expr);
91 const struct nf_loginfo *li = &priv->loginfo;
92
93 if (priv->prefix != nft_log_null_prefix)
94 if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix))
95 goto nla_put_failure;
96 if (li->u.ulog.group)
97 if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
98 goto nla_put_failure;
99 if (li->u.ulog.copy_len)
100 if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
101 htonl(li->u.ulog.copy_len)))
102 goto nla_put_failure;
103 if (li->u.ulog.qthreshold)
104 if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD,
105 htons(li->u.ulog.qthreshold)))
106 goto nla_put_failure;
107 return 0;
108
109nla_put_failure:
110 return -1;
111}
112
113static struct nft_expr_type nft_log_type;
114static const struct nft_expr_ops nft_log_ops = {
115 .type = &nft_log_type,
116 .size = NFT_EXPR_SIZE(sizeof(struct nft_log)),
117 .eval = nft_log_eval,
118 .init = nft_log_init,
119 .destroy = nft_log_destroy,
120 .dump = nft_log_dump,
121};
122
123static struct nft_expr_type nft_log_type __read_mostly = {
124 .name = "log",
125 .ops = &nft_log_ops,
126 .policy = nft_log_policy,
127 .maxattr = NFTA_LOG_MAX,
128 .owner = THIS_MODULE,
129};
130
131static int __init nft_log_module_init(void)
132{
133 return nft_register_expr(&nft_log_type);
134}
135
136static void __exit nft_log_module_exit(void)
137{
138 nft_unregister_expr(&nft_log_type);
139}
140
141module_init(nft_log_module_init);
142module_exit(nft_log_module_exit);
143
144MODULE_LICENSE("GPL");
145MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
146MODULE_ALIAS_NFT_EXPR("log");
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
new file mode 100644
index 000000000000..8a6116b75b5a
--- /dev/null
+++ b/net/netfilter/nft_lookup.c
@@ -0,0 +1,141 @@
1/*
2 * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/list.h>
14#include <linux/rbtree.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nf_tables.h>
18#include <net/netfilter/nf_tables.h>
19
20struct nft_lookup {
21 struct nft_set *set;
22 enum nft_registers sreg:8;
23 enum nft_registers dreg:8;
24 struct nft_set_binding binding;
25};
26
27static void nft_lookup_eval(const struct nft_expr *expr,
28 struct nft_data data[NFT_REG_MAX + 1],
29 const struct nft_pktinfo *pkt)
30{
31 const struct nft_lookup *priv = nft_expr_priv(expr);
32 const struct nft_set *set = priv->set;
33
34 if (set->ops->lookup(set, &data[priv->sreg], &data[priv->dreg]))
35 return;
36 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
37}
38
39static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = {
40 [NFTA_LOOKUP_SET] = { .type = NLA_STRING },
41 [NFTA_LOOKUP_SREG] = { .type = NLA_U32 },
42 [NFTA_LOOKUP_DREG] = { .type = NLA_U32 },
43};
44
45static int nft_lookup_init(const struct nft_ctx *ctx,
46 const struct nft_expr *expr,
47 const struct nlattr * const tb[])
48{
49 struct nft_lookup *priv = nft_expr_priv(expr);
50 struct nft_set *set;
51 int err;
52
53 if (tb[NFTA_LOOKUP_SET] == NULL ||
54 tb[NFTA_LOOKUP_SREG] == NULL)
55 return -EINVAL;
56
57 set = nf_tables_set_lookup(ctx->table, tb[NFTA_LOOKUP_SET]);
58 if (IS_ERR(set))
59 return PTR_ERR(set);
60
61 priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG]));
62 err = nft_validate_input_register(priv->sreg);
63 if (err < 0)
64 return err;
65
66 if (tb[NFTA_LOOKUP_DREG] != NULL) {
67 if (!(set->flags & NFT_SET_MAP))
68 return -EINVAL;
69
70 priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG]));
71 err = nft_validate_output_register(priv->dreg);
72 if (err < 0)
73 return err;
74
75 if (priv->dreg == NFT_REG_VERDICT) {
76 if (set->dtype != NFT_DATA_VERDICT)
77 return -EINVAL;
78 } else if (set->dtype == NFT_DATA_VERDICT)
79 return -EINVAL;
80 } else if (set->flags & NFT_SET_MAP)
81 return -EINVAL;
82
83 err = nf_tables_bind_set(ctx, set, &priv->binding);
84 if (err < 0)
85 return err;
86
87 priv->set = set;
88 return 0;
89}
90
91static void nft_lookup_destroy(const struct nft_expr *expr)
92{
93 struct nft_lookup *priv = nft_expr_priv(expr);
94
95 nf_tables_unbind_set(NULL, priv->set, &priv->binding);
96}
97
98static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr)
99{
100 const struct nft_lookup *priv = nft_expr_priv(expr);
101
102 if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name))
103 goto nla_put_failure;
104 if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg)))
105 goto nla_put_failure;
106 if (priv->set->flags & NFT_SET_MAP)
107 if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg)))
108 goto nla_put_failure;
109 return 0;
110
111nla_put_failure:
112 return -1;
113}
114
115static struct nft_expr_type nft_lookup_type;
116static const struct nft_expr_ops nft_lookup_ops = {
117 .type = &nft_lookup_type,
118 .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
119 .eval = nft_lookup_eval,
120 .init = nft_lookup_init,
121 .destroy = nft_lookup_destroy,
122 .dump = nft_lookup_dump,
123};
124
125static struct nft_expr_type nft_lookup_type __read_mostly = {
126 .name = "lookup",
127 .ops = &nft_lookup_ops,
128 .policy = nft_lookup_policy,
129 .maxattr = NFTA_LOOKUP_MAX,
130 .owner = THIS_MODULE,
131};
132
133int __init nft_lookup_module_init(void)
134{
135 return nft_register_expr(&nft_lookup_type);
136}
137
138void nft_lookup_module_exit(void)
139{
140 nft_unregister_expr(&nft_lookup_type);
141}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
new file mode 100644
index 000000000000..8c28220a90b3
--- /dev/null
+++ b/net/netfilter/nft_meta.c
@@ -0,0 +1,228 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/dst.h>
18#include <net/sock.h>
19#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
20#include <net/netfilter/nf_tables.h>
21
22struct nft_meta {
23 enum nft_meta_keys key:8;
24 enum nft_registers dreg:8;
25};
26
27static void nft_meta_eval(const struct nft_expr *expr,
28 struct nft_data data[NFT_REG_MAX + 1],
29 const struct nft_pktinfo *pkt)
30{
31 const struct nft_meta *priv = nft_expr_priv(expr);
32 const struct sk_buff *skb = pkt->skb;
33 const struct net_device *in = pkt->in, *out = pkt->out;
34 struct nft_data *dest = &data[priv->dreg];
35
36 switch (priv->key) {
37 case NFT_META_LEN:
38 dest->data[0] = skb->len;
39 break;
40 case NFT_META_PROTOCOL:
41 *(__be16 *)dest->data = skb->protocol;
42 break;
43 case NFT_META_PRIORITY:
44 dest->data[0] = skb->priority;
45 break;
46 case NFT_META_MARK:
47 dest->data[0] = skb->mark;
48 break;
49 case NFT_META_IIF:
50 if (in == NULL)
51 goto err;
52 dest->data[0] = in->ifindex;
53 break;
54 case NFT_META_OIF:
55 if (out == NULL)
56 goto err;
57 dest->data[0] = out->ifindex;
58 break;
59 case NFT_META_IIFNAME:
60 if (in == NULL)
61 goto err;
62 strncpy((char *)dest->data, in->name, sizeof(dest->data));
63 break;
64 case NFT_META_OIFNAME:
65 if (out == NULL)
66 goto err;
67 strncpy((char *)dest->data, out->name, sizeof(dest->data));
68 break;
69 case NFT_META_IIFTYPE:
70 if (in == NULL)
71 goto err;
72 *(u16 *)dest->data = in->type;
73 break;
74 case NFT_META_OIFTYPE:
75 if (out == NULL)
76 goto err;
77 *(u16 *)dest->data = out->type;
78 break;
79 case NFT_META_SKUID:
80 if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
81 goto err;
82
83 read_lock_bh(&skb->sk->sk_callback_lock);
84 if (skb->sk->sk_socket == NULL ||
85 skb->sk->sk_socket->file == NULL) {
86 read_unlock_bh(&skb->sk->sk_callback_lock);
87 goto err;
88 }
89
90 dest->data[0] =
91 from_kuid_munged(&init_user_ns,
92 skb->sk->sk_socket->file->f_cred->fsuid);
93 read_unlock_bh(&skb->sk->sk_callback_lock);
94 break;
95 case NFT_META_SKGID:
96 if (skb->sk == NULL || skb->sk->sk_state == TCP_TIME_WAIT)
97 goto err;
98
99 read_lock_bh(&skb->sk->sk_callback_lock);
100 if (skb->sk->sk_socket == NULL ||
101 skb->sk->sk_socket->file == NULL) {
102 read_unlock_bh(&skb->sk->sk_callback_lock);
103 goto err;
104 }
105 dest->data[0] =
106 from_kgid_munged(&init_user_ns,
107 skb->sk->sk_socket->file->f_cred->fsgid);
108 read_unlock_bh(&skb->sk->sk_callback_lock);
109 break;
110#ifdef CONFIG_NET_CLS_ROUTE
111 case NFT_META_RTCLASSID: {
112 const struct dst_entry *dst = skb_dst(skb);
113
114 if (dst == NULL)
115 goto err;
116 dest->data[0] = dst->tclassid;
117 break;
118 }
119#endif
120#ifdef CONFIG_NETWORK_SECMARK
121 case NFT_META_SECMARK:
122 dest->data[0] = skb->secmark;
123 break;
124#endif
125 default:
126 WARN_ON(1);
127 goto err;
128 }
129 return;
130
131err:
132 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
133}
134
135static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
136 [NFTA_META_DREG] = { .type = NLA_U32 },
137 [NFTA_META_KEY] = { .type = NLA_U32 },
138};
139
140static int nft_meta_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
141 const struct nlattr * const tb[])
142{
143 struct nft_meta *priv = nft_expr_priv(expr);
144 int err;
145
146 if (tb[NFTA_META_DREG] == NULL ||
147 tb[NFTA_META_KEY] == NULL)
148 return -EINVAL;
149
150 priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
151 switch (priv->key) {
152 case NFT_META_LEN:
153 case NFT_META_PROTOCOL:
154 case NFT_META_PRIORITY:
155 case NFT_META_MARK:
156 case NFT_META_IIF:
157 case NFT_META_OIF:
158 case NFT_META_IIFNAME:
159 case NFT_META_OIFNAME:
160 case NFT_META_IIFTYPE:
161 case NFT_META_OIFTYPE:
162 case NFT_META_SKUID:
163 case NFT_META_SKGID:
164#ifdef CONFIG_NET_CLS_ROUTE
165 case NFT_META_RTCLASSID:
166#endif
167#ifdef CONFIG_NETWORK_SECMARK
168 case NFT_META_SECMARK:
169#endif
170 break;
171 default:
172 return -EOPNOTSUPP;
173 }
174
175 priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG]));
176 err = nft_validate_output_register(priv->dreg);
177 if (err < 0)
178 return err;
179 return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
180}
181
182static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
183{
184 const struct nft_meta *priv = nft_expr_priv(expr);
185
186 if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg)))
187 goto nla_put_failure;
188 if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key)))
189 goto nla_put_failure;
190 return 0;
191
192nla_put_failure:
193 return -1;
194}
195
196static struct nft_expr_type nft_meta_type;
197static const struct nft_expr_ops nft_meta_ops = {
198 .type = &nft_meta_type,
199 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
200 .eval = nft_meta_eval,
201 .init = nft_meta_init,
202 .dump = nft_meta_dump,
203};
204
205static struct nft_expr_type nft_meta_type __read_mostly = {
206 .name = "meta",
207 .ops = &nft_meta_ops,
208 .policy = nft_meta_policy,
209 .maxattr = NFTA_META_MAX,
210 .owner = THIS_MODULE,
211};
212
213static int __init nft_meta_module_init(void)
214{
215 return nft_register_expr(&nft_meta_type);
216}
217
218static void __exit nft_meta_module_exit(void)
219{
220 nft_unregister_expr(&nft_meta_type);
221}
222
223module_init(nft_meta_module_init);
224module_exit(nft_meta_module_exit);
225
226MODULE_LICENSE("GPL");
227MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
228MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_meta_target.c b/net/netfilter/nft_meta_target.c
new file mode 100644
index 000000000000..71177df75ffb
--- /dev/null
+++ b/net/netfilter/nft_meta_target.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/list.h>
14#include <linux/rbtree.h>
15#include <linux/netlink.h>
16#include <linux/netfilter.h>
17#include <linux/netfilter/nf_tables.h>
18#include <net/netfilter/nf_tables.h>
19
20struct nft_meta {
21 enum nft_meta_keys key;
22};
23
24static void nft_meta_eval(const struct nft_expr *expr,
25 struct nft_data *nfres,
26 struct nft_data *data,
27 const struct nft_pktinfo *pkt)
28{
29 const struct nft_meta *meta = nft_expr_priv(expr);
30 struct sk_buff *skb = pkt->skb;
31 u32 val = data->data[0];
32
33 switch (meta->key) {
34 case NFT_META_MARK:
35 skb->mark = val;
36 break;
37 case NFT_META_PRIORITY:
38 skb->priority = val;
39 break;
40 case NFT_META_NFTRACE:
41 skb->nf_trace = val;
42 break;
43#ifdef CONFIG_NETWORK_SECMARK
44 case NFT_META_SECMARK:
45 skb->secmark = val;
46 break;
47#endif
48 default:
49 WARN_ON(1);
50 }
51}
52
53static const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
54 [NFTA_META_KEY] = { .type = NLA_U32 },
55};
56
57static int nft_meta_init(const struct nft_expr *expr, struct nlattr *tb[])
58{
59 struct nft_meta *meta = nft_expr_priv(expr);
60
61 if (tb[NFTA_META_KEY] == NULL)
62 return -EINVAL;
63
64 meta->key = ntohl(nla_get_be32(tb[NFTA_META_KEY]));
65 switch (meta->key) {
66 case NFT_META_MARK:
67 case NFT_META_PRIORITY:
68 case NFT_META_NFTRACE:
69#ifdef CONFIG_NETWORK_SECMARK
70 case NFT_META_SECMARK:
71#endif
72 break;
73 default:
74 return -EINVAL;
75 }
76
77 return 0;
78}
79
80static int nft_meta_dump(struct sk_buff *skb, const struct nft_expr *expr)
81{
82 struct nft_meta *meta = nft_expr_priv(expr);
83
84 NLA_PUT_BE32(skb, NFTA_META_KEY, htonl(meta->key));
85 return 0;
86
87nla_put_failure:
88 return -1;
89}
90
91static struct nft_expr_ops meta_target __read_mostly = {
92 .name = "meta",
93 .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
94 .owner = THIS_MODULE,
95 .eval = nft_meta_eval,
96 .init = nft_meta_init,
97 .dump = nft_meta_dump,
98 .policy = nft_meta_policy,
99 .maxattr = NFTA_META_MAX,
100};
101
102static int __init nft_meta_target_init(void)
103{
104 return nft_register_expr(&meta_target);
105}
106
107static void __exit nft_meta_target_exit(void)
108{
109 nft_unregister_expr(&meta_target);
110}
111
112module_init(nft_meta_target_init);
113module_exit(nft_meta_target_exit);
114
115MODULE_LICENSE("GPL");
116MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
117MODULE_ALIAS_NFT_EXPR("meta");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
new file mode 100644
index 000000000000..b0b87b2d2411
--- /dev/null
+++ b/net/netfilter/nft_nat.c
@@ -0,0 +1,220 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
4 * Copyright (c) 2012 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <linux/string.h>
17#include <linux/netlink.h>
18#include <linux/netfilter.h>
19#include <linux/netfilter_ipv4.h>
20#include <linux/netfilter/nfnetlink.h>
21#include <linux/netfilter/nf_tables.h>
22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_nat.h>
24#include <net/netfilter/nf_nat_core.h>
25#include <net/netfilter/nf_tables.h>
26#include <net/netfilter/nf_nat_l3proto.h>
27#include <net/ip.h>
28
29struct nft_nat {
30 enum nft_registers sreg_addr_min:8;
31 enum nft_registers sreg_addr_max:8;
32 enum nft_registers sreg_proto_min:8;
33 enum nft_registers sreg_proto_max:8;
34 int family;
35 enum nf_nat_manip_type type;
36};
37
38static void nft_nat_eval(const struct nft_expr *expr,
39 struct nft_data data[NFT_REG_MAX + 1],
40 const struct nft_pktinfo *pkt)
41{
42 const struct nft_nat *priv = nft_expr_priv(expr);
43 enum ip_conntrack_info ctinfo;
44 struct nf_conn *ct = nf_ct_get(pkt->skb, &ctinfo);
45 struct nf_nat_range range;
46
47 memset(&range, 0, sizeof(range));
48 if (priv->sreg_addr_min) {
49 if (priv->family == AF_INET) {
50 range.min_addr.ip = data[priv->sreg_addr_min].data[0];
51 range.max_addr.ip = data[priv->sreg_addr_max].data[0];
52
53 } else {
54 memcpy(range.min_addr.ip6,
55 data[priv->sreg_addr_min].data,
56 sizeof(struct nft_data));
57 memcpy(range.max_addr.ip6,
58 data[priv->sreg_addr_max].data,
59 sizeof(struct nft_data));
60 }
61 range.flags |= NF_NAT_RANGE_MAP_IPS;
62 }
63
64 if (priv->sreg_proto_min) {
65 range.min_proto.all = data[priv->sreg_proto_min].data[0];
66 range.max_proto.all = data[priv->sreg_proto_max].data[0];
67 range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
68 }
69
70 data[NFT_REG_VERDICT].verdict =
71 nf_nat_setup_info(ct, &range, priv->type);
72}
73
74static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = {
75 [NFTA_NAT_TYPE] = { .type = NLA_U32 },
76 [NFTA_NAT_FAMILY] = { .type = NLA_U32 },
77 [NFTA_NAT_REG_ADDR_MIN] = { .type = NLA_U32 },
78 [NFTA_NAT_REG_ADDR_MAX] = { .type = NLA_U32 },
79 [NFTA_NAT_REG_PROTO_MIN] = { .type = NLA_U32 },
80 [NFTA_NAT_REG_PROTO_MAX] = { .type = NLA_U32 },
81};
82
83static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
84 const struct nlattr * const tb[])
85{
86 struct nft_nat *priv = nft_expr_priv(expr);
87 int err;
88
89 if (tb[NFTA_NAT_TYPE] == NULL)
90 return -EINVAL;
91
92 switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
93 case NFT_NAT_SNAT:
94 priv->type = NF_NAT_MANIP_SRC;
95 break;
96 case NFT_NAT_DNAT:
97 priv->type = NF_NAT_MANIP_DST;
98 break;
99 default:
100 return -EINVAL;
101 }
102
103 if (tb[NFTA_NAT_FAMILY] == NULL)
104 return -EINVAL;
105
106 priv->family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY]));
107 if (priv->family != AF_INET && priv->family != AF_INET6)
108 return -EINVAL;
109
110 if (tb[NFTA_NAT_REG_ADDR_MIN]) {
111 priv->sreg_addr_min = ntohl(nla_get_be32(
112 tb[NFTA_NAT_REG_ADDR_MIN]));
113 err = nft_validate_input_register(priv->sreg_addr_min);
114 if (err < 0)
115 return err;
116 }
117
118 if (tb[NFTA_NAT_REG_ADDR_MAX]) {
119 priv->sreg_addr_max = ntohl(nla_get_be32(
120 tb[NFTA_NAT_REG_ADDR_MAX]));
121 err = nft_validate_input_register(priv->sreg_addr_max);
122 if (err < 0)
123 return err;
124 } else
125 priv->sreg_addr_max = priv->sreg_addr_min;
126
127 if (tb[NFTA_NAT_REG_PROTO_MIN]) {
128 priv->sreg_proto_min = ntohl(nla_get_be32(
129 tb[NFTA_NAT_REG_PROTO_MIN]));
130 err = nft_validate_input_register(priv->sreg_proto_min);
131 if (err < 0)
132 return err;
133 }
134
135 if (tb[NFTA_NAT_REG_PROTO_MAX]) {
136 priv->sreg_proto_max = ntohl(nla_get_be32(
137 tb[NFTA_NAT_REG_PROTO_MAX]));
138 err = nft_validate_input_register(priv->sreg_proto_max);
139 if (err < 0)
140 return err;
141 } else
142 priv->sreg_proto_max = priv->sreg_proto_min;
143
144 return 0;
145}
146
147static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
148{
149 const struct nft_nat *priv = nft_expr_priv(expr);
150
151 switch (priv->type) {
152 case NF_NAT_MANIP_SRC:
153 if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_SNAT)))
154 goto nla_put_failure;
155 break;
156 case NF_NAT_MANIP_DST:
157 if (nla_put_be32(skb, NFTA_NAT_TYPE, htonl(NFT_NAT_DNAT)))
158 goto nla_put_failure;
159 break;
160 }
161
162 if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family)))
163 goto nla_put_failure;
164 if (nla_put_be32(skb,
165 NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min)))
166 goto nla_put_failure;
167 if (nla_put_be32(skb,
168 NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
169 goto nla_put_failure;
170 if (nla_put_be32(skb,
171 NFTA_NAT_REG_PROTO_MIN, htonl(priv->sreg_proto_min)))
172 goto nla_put_failure;
173 if (nla_put_be32(skb,
174 NFTA_NAT_REG_PROTO_MAX, htonl(priv->sreg_proto_max)))
175 goto nla_put_failure;
176 return 0;
177
178nla_put_failure:
179 return -1;
180}
181
182static struct nft_expr_type nft_nat_type;
183static const struct nft_expr_ops nft_nat_ops = {
184 .type = &nft_nat_type,
185 .size = NFT_EXPR_SIZE(sizeof(struct nft_nat)),
186 .eval = nft_nat_eval,
187 .init = nft_nat_init,
188 .dump = nft_nat_dump,
189};
190
191static struct nft_expr_type nft_nat_type __read_mostly = {
192 .name = "nat",
193 .ops = &nft_nat_ops,
194 .policy = nft_nat_policy,
195 .maxattr = NFTA_NAT_MAX,
196 .owner = THIS_MODULE,
197};
198
199static int __init nft_nat_module_init(void)
200{
201 int err;
202
203 err = nft_register_expr(&nft_nat_type);
204 if (err < 0)
205 return err;
206
207 return 0;
208}
209
210static void __exit nft_nat_module_exit(void)
211{
212 nft_unregister_expr(&nft_nat_type);
213}
214
215module_init(nft_nat_module_init);
216module_exit(nft_nat_module_exit);
217
218MODULE_LICENSE("GPL");
219MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
220MODULE_ALIAS_NFT_EXPR("nat");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
new file mode 100644
index 000000000000..a2aeb318678f
--- /dev/null
+++ b/net/netfilter/nft_payload.c
@@ -0,0 +1,160 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/netlink.h>
15#include <linux/netfilter.h>
16#include <linux/netfilter/nf_tables.h>
17#include <net/netfilter/nf_tables_core.h>
18#include <net/netfilter/nf_tables.h>
19
20static void nft_payload_eval(const struct nft_expr *expr,
21 struct nft_data data[NFT_REG_MAX + 1],
22 const struct nft_pktinfo *pkt)
23{
24 const struct nft_payload *priv = nft_expr_priv(expr);
25 const struct sk_buff *skb = pkt->skb;
26 struct nft_data *dest = &data[priv->dreg];
27 int offset;
28
29 switch (priv->base) {
30 case NFT_PAYLOAD_LL_HEADER:
31 if (!skb_mac_header_was_set(skb))
32 goto err;
33 offset = skb_mac_header(skb) - skb->data;
34 break;
35 case NFT_PAYLOAD_NETWORK_HEADER:
36 offset = skb_network_offset(skb);
37 break;
38 case NFT_PAYLOAD_TRANSPORT_HEADER:
39 offset = pkt->xt.thoff;
40 break;
41 default:
42 BUG();
43 }
44 offset += priv->offset;
45
46 if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0)
47 goto err;
48 return;
49err:
50 data[NFT_REG_VERDICT].verdict = NFT_BREAK;
51}
52
53static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
54 [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
55 [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
56 [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
57 [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
58};
59
60static int nft_payload_init(const struct nft_ctx *ctx,
61 const struct nft_expr *expr,
62 const struct nlattr * const tb[])
63{
64 struct nft_payload *priv = nft_expr_priv(expr);
65 int err;
66
67 priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
68 priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
69 priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
70
71 priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG]));
72 err = nft_validate_output_register(priv->dreg);
73 if (err < 0)
74 return err;
75 return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE);
76}
77
78static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
79{
80 const struct nft_payload *priv = nft_expr_priv(expr);
81
82 if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) ||
83 nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
84 nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
85 nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)))
86 goto nla_put_failure;
87 return 0;
88
89nla_put_failure:
90 return -1;
91}
92
93static struct nft_expr_type nft_payload_type;
94static const struct nft_expr_ops nft_payload_ops = {
95 .type = &nft_payload_type,
96 .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
97 .eval = nft_payload_eval,
98 .init = nft_payload_init,
99 .dump = nft_payload_dump,
100};
101
102const struct nft_expr_ops nft_payload_fast_ops = {
103 .type = &nft_payload_type,
104 .size = NFT_EXPR_SIZE(sizeof(struct nft_payload)),
105 .eval = nft_payload_eval,
106 .init = nft_payload_init,
107 .dump = nft_payload_dump,
108};
109
110static const struct nft_expr_ops *
111nft_payload_select_ops(const struct nft_ctx *ctx,
112 const struct nlattr * const tb[])
113{
114 enum nft_payload_bases base;
115 unsigned int offset, len;
116
117 if (tb[NFTA_PAYLOAD_DREG] == NULL ||
118 tb[NFTA_PAYLOAD_BASE] == NULL ||
119 tb[NFTA_PAYLOAD_OFFSET] == NULL ||
120 tb[NFTA_PAYLOAD_LEN] == NULL)
121 return ERR_PTR(-EINVAL);
122
123 base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
124 switch (base) {
125 case NFT_PAYLOAD_LL_HEADER:
126 case NFT_PAYLOAD_NETWORK_HEADER:
127 case NFT_PAYLOAD_TRANSPORT_HEADER:
128 break;
129 default:
130 return ERR_PTR(-EOPNOTSUPP);
131 }
132
133 offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
134 len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
135 if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
136 return ERR_PTR(-EINVAL);
137
138 if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER)
139 return &nft_payload_fast_ops;
140 else
141 return &nft_payload_ops;
142}
143
144static struct nft_expr_type nft_payload_type __read_mostly = {
145 .name = "payload",
146 .select_ops = nft_payload_select_ops,
147 .policy = nft_payload_policy,
148 .maxattr = NFTA_PAYLOAD_MAX,
149 .owner = THIS_MODULE,
150};
151
152int __init nft_payload_module_init(void)
153{
154 return nft_register_expr(&nft_payload_type);
155}
156
157void nft_payload_module_exit(void)
158{
159 nft_unregister_expr(&nft_payload_type);
160}
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
new file mode 100644
index 000000000000..ca0c1b231bfe
--- /dev/null
+++ b/net/netfilter/nft_rbtree.c
@@ -0,0 +1,247 @@
1/*
2 * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * Development of this code funded by Astaro AG (http://www.astaro.com/)
9 */
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/list.h>
15#include <linux/rbtree.h>
16#include <linux/netlink.h>
17#include <linux/netfilter.h>
18#include <linux/netfilter/nf_tables.h>
19#include <net/netfilter/nf_tables.h>
20
21struct nft_rbtree {
22 struct rb_root root;
23};
24
25struct nft_rbtree_elem {
26 struct rb_node node;
27 u16 flags;
28 struct nft_data key;
29 struct nft_data data[];
30};
31
32static bool nft_rbtree_lookup(const struct nft_set *set,
33 const struct nft_data *key,
34 struct nft_data *data)
35{
36 const struct nft_rbtree *priv = nft_set_priv(set);
37 const struct nft_rbtree_elem *rbe, *interval = NULL;
38 const struct rb_node *parent = priv->root.rb_node;
39 int d;
40
41 while (parent != NULL) {
42 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
43
44 d = nft_data_cmp(&rbe->key, key, set->klen);
45 if (d < 0) {
46 parent = parent->rb_left;
47 interval = rbe;
48 } else if (d > 0)
49 parent = parent->rb_right;
50 else {
51found:
52 if (rbe->flags & NFT_SET_ELEM_INTERVAL_END)
53 goto out;
54 if (set->flags & NFT_SET_MAP)
55 nft_data_copy(data, rbe->data);
56 return true;
57 }
58 }
59
60 if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
61 rbe = interval;
62 goto found;
63 }
64out:
65 return false;
66}
67
68static void nft_rbtree_elem_destroy(const struct nft_set *set,
69 struct nft_rbtree_elem *rbe)
70{
71 nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
72 if (set->flags & NFT_SET_MAP)
73 nft_data_uninit(rbe->data, set->dtype);
74 kfree(rbe);
75}
76
77static int __nft_rbtree_insert(const struct nft_set *set,
78 struct nft_rbtree_elem *new)
79{
80 struct nft_rbtree *priv = nft_set_priv(set);
81 struct nft_rbtree_elem *rbe;
82 struct rb_node *parent, **p;
83 int d;
84
85 parent = NULL;
86 p = &priv->root.rb_node;
87 while (*p != NULL) {
88 parent = *p;
89 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
90 d = nft_data_cmp(&rbe->key, &new->key, set->klen);
91 if (d < 0)
92 p = &parent->rb_left;
93 else if (d > 0)
94 p = &parent->rb_right;
95 else
96 return -EEXIST;
97 }
98 rb_link_node(&new->node, parent, p);
99 rb_insert_color(&new->node, &priv->root);
100 return 0;
101}
102
103static int nft_rbtree_insert(const struct nft_set *set,
104 const struct nft_set_elem *elem)
105{
106 struct nft_rbtree_elem *rbe;
107 unsigned int size;
108 int err;
109
110 size = sizeof(*rbe);
111 if (set->flags & NFT_SET_MAP)
112 size += sizeof(rbe->data[0]);
113
114 rbe = kzalloc(size, GFP_KERNEL);
115 if (rbe == NULL)
116 return -ENOMEM;
117
118 rbe->flags = elem->flags;
119 nft_data_copy(&rbe->key, &elem->key);
120 if (set->flags & NFT_SET_MAP)
121 nft_data_copy(rbe->data, &elem->data);
122
123 err = __nft_rbtree_insert(set, rbe);
124 if (err < 0)
125 kfree(rbe);
126 return err;
127}
128
129static void nft_rbtree_remove(const struct nft_set *set,
130 const struct nft_set_elem *elem)
131{
132 struct nft_rbtree *priv = nft_set_priv(set);
133 struct nft_rbtree_elem *rbe = elem->cookie;
134
135 rb_erase(&rbe->node, &priv->root);
136 kfree(rbe);
137}
138
139static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
140{
141 const struct nft_rbtree *priv = nft_set_priv(set);
142 const struct rb_node *parent = priv->root.rb_node;
143 struct nft_rbtree_elem *rbe;
144 int d;
145
146 while (parent != NULL) {
147 rbe = rb_entry(parent, struct nft_rbtree_elem, node);
148
149 d = nft_data_cmp(&rbe->key, &elem->key, set->klen);
150 if (d < 0)
151 parent = parent->rb_left;
152 else if (d > 0)
153 parent = parent->rb_right;
154 else {
155 elem->cookie = rbe;
156 if (set->flags & NFT_SET_MAP)
157 nft_data_copy(&elem->data, rbe->data);
158 elem->flags = rbe->flags;
159 return 0;
160 }
161 }
162 return -ENOENT;
163}
164
165static void nft_rbtree_walk(const struct nft_ctx *ctx,
166 const struct nft_set *set,
167 struct nft_set_iter *iter)
168{
169 const struct nft_rbtree *priv = nft_set_priv(set);
170 const struct nft_rbtree_elem *rbe;
171 struct nft_set_elem elem;
172 struct rb_node *node;
173
174 for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
175 if (iter->count < iter->skip)
176 goto cont;
177
178 rbe = rb_entry(node, struct nft_rbtree_elem, node);
179 nft_data_copy(&elem.key, &rbe->key);
180 if (set->flags & NFT_SET_MAP)
181 nft_data_copy(&elem.data, rbe->data);
182 elem.flags = rbe->flags;
183
184 iter->err = iter->fn(ctx, set, iter, &elem);
185 if (iter->err < 0)
186 return;
187cont:
188 iter->count++;
189 }
190}
191
192static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[])
193{
194 return sizeof(struct nft_rbtree);
195}
196
197static int nft_rbtree_init(const struct nft_set *set,
198 const struct nlattr * const nla[])
199{
200 struct nft_rbtree *priv = nft_set_priv(set);
201
202 priv->root = RB_ROOT;
203 return 0;
204}
205
206static void nft_rbtree_destroy(const struct nft_set *set)
207{
208 struct nft_rbtree *priv = nft_set_priv(set);
209 struct nft_rbtree_elem *rbe;
210 struct rb_node *node;
211
212 while ((node = priv->root.rb_node) != NULL) {
213 rb_erase(node, &priv->root);
214 rbe = rb_entry(node, struct nft_rbtree_elem, node);
215 nft_rbtree_elem_destroy(set, rbe);
216 }
217}
218
219static struct nft_set_ops nft_rbtree_ops __read_mostly = {
220 .privsize = nft_rbtree_privsize,
221 .init = nft_rbtree_init,
222 .destroy = nft_rbtree_destroy,
223 .insert = nft_rbtree_insert,
224 .remove = nft_rbtree_remove,
225 .get = nft_rbtree_get,
226 .lookup = nft_rbtree_lookup,
227 .walk = nft_rbtree_walk,
228 .features = NFT_SET_INTERVAL | NFT_SET_MAP,
229 .owner = THIS_MODULE,
230};
231
232static int __init nft_rbtree_module_init(void)
233{
234 return nft_register_set(&nft_rbtree_ops);
235}
236
237static void __exit nft_rbtree_module_exit(void)
238{
239 nft_unregister_set(&nft_rbtree_ops);
240}
241
242module_init(nft_rbtree_module_init);
243module_exit(nft_rbtree_module_exit);
244
245MODULE_LICENSE("GPL");
246MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
247MODULE_ALIAS_NFT_SET();
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 568c7699abf1..3f224d7795f5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4668,7 +4668,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
4668 return NF_ACCEPT; 4668 return NF_ACCEPT;
4669} 4669}
4670 4670
4671static unsigned int selinux_ipv4_forward(unsigned int hooknum, 4671static unsigned int selinux_ipv4_forward(const struct nf_hook_ops *ops,
4672 struct sk_buff *skb, 4672 struct sk_buff *skb,
4673 const struct net_device *in, 4673 const struct net_device *in,
4674 const struct net_device *out, 4674 const struct net_device *out,
@@ -4678,7 +4678,7 @@ static unsigned int selinux_ipv4_forward(unsigned int hooknum,
4678} 4678}
4679 4679
4680#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 4680#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
4681static unsigned int selinux_ipv6_forward(unsigned int hooknum, 4681static unsigned int selinux_ipv6_forward(const struct nf_hook_ops *ops,
4682 struct sk_buff *skb, 4682 struct sk_buff *skb,
4683 const struct net_device *in, 4683 const struct net_device *in,
4684 const struct net_device *out, 4684 const struct net_device *out,
@@ -4710,7 +4710,7 @@ static unsigned int selinux_ip_output(struct sk_buff *skb,
4710 return NF_ACCEPT; 4710 return NF_ACCEPT;
4711} 4711}
4712 4712
4713static unsigned int selinux_ipv4_output(unsigned int hooknum, 4713static unsigned int selinux_ipv4_output(const struct nf_hook_ops *ops,
4714 struct sk_buff *skb, 4714 struct sk_buff *skb,
4715 const struct net_device *in, 4715 const struct net_device *in,
4716 const struct net_device *out, 4716 const struct net_device *out,
@@ -4837,7 +4837,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
4837 return NF_ACCEPT; 4837 return NF_ACCEPT;
4838} 4838}
4839 4839
4840static unsigned int selinux_ipv4_postroute(unsigned int hooknum, 4840static unsigned int selinux_ipv4_postroute(const struct nf_hook_ops *ops,
4841 struct sk_buff *skb, 4841 struct sk_buff *skb,
4842 const struct net_device *in, 4842 const struct net_device *in,
4843 const struct net_device *out, 4843 const struct net_device *out,
@@ -4847,7 +4847,7 @@ static unsigned int selinux_ipv4_postroute(unsigned int hooknum,
4847} 4847}
4848 4848
4849#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 4849#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
4850static unsigned int selinux_ipv6_postroute(unsigned int hooknum, 4850static unsigned int selinux_ipv6_postroute(const struct nf_hook_ops *ops,
4851 struct sk_buff *skb, 4851 struct sk_buff *skb,
4852 const struct net_device *in, 4852 const struct net_device *in,
4853 const struct net_device *out, 4853 const struct net_device *out,