aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 17:27:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-12 17:27:40 -0400
commitf9da455b93f6ba076935b4ef4589f61e529ae046 (patch)
tree3c4e69ce1ba1d6bf65915b97a76ca2172105b278 /net/core
parent0e04c641b199435f3779454055f6a7de258ecdfc (diff)
parente5eca6d41f53db48edd8cf88a3f59d2c30227f8e (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Seccomp BPF filters can now be JIT'd, from Alexei Starovoitov. 2) Multiqueue support in xen-netback and xen-netfront, from Andrew J Benniston. 3) Allow tweaking of aggregation settings in cdc_ncm driver, from Bjørn Mork. 4) BPF now has a "random" opcode, from Chema Gonzalez. 5) Add more BPF documentation and improve test framework, from Daniel Borkmann. 6) Support TCP fastopen over ipv6, from Daniel Lee. 7) Add software TSO helper functions and use them to support software TSO in mvneta and mv643xx_eth drivers. From Ezequiel Garcia. 8) Support software TSO in fec driver too, from Nimrod Andy. 9) Add Broadcom SYSTEMPORT driver, from Florian Fainelli. 10) Handle broadcasts more gracefully over macvlan when there are large numbers of interfaces configured, from Herbert Xu. 11) Allow more control over fwmark used for non-socket based responses, from Lorenzo Colitti. 12) Do TCP congestion window limiting based upon measurements, from Neal Cardwell. 13) Support busy polling in SCTP, from Neal Horman. 14) Allow RSS key to be configured via ethtool, from Venkata Duvvuru. 15) Bridge promisc mode handling improvements from Vlad Yasevich. 16) Don't use inetpeer entries to implement ID generation any more, it performs poorly, from Eric Dumazet. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1522 commits) rtnetlink: fix userspace API breakage for iproute2 < v3.9.0 tcp: fixing TLP's FIN recovery net: fec: Add software TSO support net: fec: Add Scatter/gather support net: fec: Increase buffer descriptor entry number net: fec: Factorize feature setting net: fec: Enable IP header hardware checksum net: fec: Factorize the .xmit transmit function bridge: fix compile error when compiling without IPv6 support bridge: fix smatch warning / potential null pointer dereference via-rhine: fix full-duplex with autoneg disable bnx2x: Enlarge the dorq threshold for VFs bnx2x: Check for UNDI in uncommon branch bnx2x: Fix 1G-baseT link bnx2x: Fix link for KR with swapped polarity lane sctp: Fix sk_ack_backlog wrap-around problem net/core: Add VF link state control policy net/fsl: xgmac_mdio is dependent on OF_MDIO net/fsl: Make xgmac_mdio read error message useful net_sched: drr: warn when qdisc is not work conserving ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/datagram.c14
-rw-r--r--net/core/dev.c98
-rw-r--r--net/core/dev_addr_lists.c85
-rw-r--r--net/core/ethtool.c215
-rw-r--r--net/core/filter.c1292
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/pktgen.c50
-rw-r--r--net/core/ptp_classifier.c4
-rw-r--r--net/core/rtnetlink.c73
-rw-r--r--net/core/secure_seq.c25
-rw-r--r--net/core/skbuff.c28
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/tso.c77
14 files changed, 1117 insertions, 852 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 826b925aa453..71093d94ad2b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -9,7 +9,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
9 9
10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ 10obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ 11 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
12 sock_diag.o dev_ioctl.o 12 sock_diag.o dev_ioctl.o tso.o
13 13
14obj-$(CONFIG_XFRM) += flow.o 14obj-$(CONFIG_XFRM) += flow.o
15obj-y += net-sysfs.o 15obj-y += net-sysfs.o
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a16ed7bbe376..6b1c04ca1d50 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -739,11 +739,15 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
739 __sum16 sum; 739 __sum16 sum;
740 740
741 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum)); 741 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
742 if (likely(!sum)) { 742 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum &&
743 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) 743 !skb->csum_complete_sw)
744 netdev_rx_csum_fault(skb->dev); 744 netdev_rx_csum_fault(skb->dev);
745 skb->ip_summed = CHECKSUM_UNNECESSARY; 745
746 } 746 /* Save checksum complete for later use */
747 skb->csum = sum;
748 skb->ip_summed = CHECKSUM_COMPLETE;
749 skb->csum_complete_sw = 1;
750
747 return sum; 751 return sum;
748} 752}
749EXPORT_SYMBOL(__skb_checksum_complete_head); 753EXPORT_SYMBOL(__skb_checksum_complete_head);
diff --git a/net/core/dev.c b/net/core/dev.c
index 8908a68db449..30eedf677913 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
1661} 1661}
1662EXPORT_SYMBOL_GPL(is_skb_forwardable); 1662EXPORT_SYMBOL_GPL(is_skb_forwardable);
1663 1663
1664int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1665{
1666 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1667 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1668 atomic_long_inc(&dev->rx_dropped);
1669 kfree_skb(skb);
1670 return NET_RX_DROP;
1671 }
1672 }
1673
1674 if (unlikely(!is_skb_forwardable(dev, skb))) {
1675 atomic_long_inc(&dev->rx_dropped);
1676 kfree_skb(skb);
1677 return NET_RX_DROP;
1678 }
1679
1680 skb_scrub_packet(skb, true);
1681 skb->protocol = eth_type_trans(skb, dev);
1682
1683 return 0;
1684}
1685EXPORT_SYMBOL_GPL(__dev_forward_skb);
1686
1664/** 1687/**
1665 * dev_forward_skb - loopback an skb to another netif 1688 * dev_forward_skb - loopback an skb to another netif
1666 * 1689 *
@@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
1681 */ 1704 */
1682int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1705int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1683{ 1706{
1684 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { 1707 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
1685 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1686 atomic_long_inc(&dev->rx_dropped);
1687 kfree_skb(skb);
1688 return NET_RX_DROP;
1689 }
1690 }
1691
1692 if (unlikely(!is_skb_forwardable(dev, skb))) {
1693 atomic_long_inc(&dev->rx_dropped);
1694 kfree_skb(skb);
1695 return NET_RX_DROP;
1696 }
1697
1698 skb_scrub_packet(skb, true);
1699 skb->protocol = eth_type_trans(skb, dev);
1700
1701 return netif_rx_internal(skb);
1702} 1708}
1703EXPORT_SYMBOL_GPL(dev_forward_skb); 1709EXPORT_SYMBOL_GPL(dev_forward_skb);
1704 1710
@@ -2507,13 +2513,39 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2507 return 0; 2513 return 0;
2508} 2514}
2509 2515
2516/* If MPLS offload request, verify we are testing hardware MPLS features
2517 * instead of standard features for the netdev.
2518 */
2519#ifdef CONFIG_NET_MPLS_GSO
2520static netdev_features_t net_mpls_features(struct sk_buff *skb,
2521 netdev_features_t features,
2522 __be16 type)
2523{
2524 if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
2525 features &= skb->dev->mpls_features;
2526
2527 return features;
2528}
2529#else
2530static netdev_features_t net_mpls_features(struct sk_buff *skb,
2531 netdev_features_t features,
2532 __be16 type)
2533{
2534 return features;
2535}
2536#endif
2537
2510static netdev_features_t harmonize_features(struct sk_buff *skb, 2538static netdev_features_t harmonize_features(struct sk_buff *skb,
2511 netdev_features_t features) 2539 netdev_features_t features)
2512{ 2540{
2513 int tmp; 2541 int tmp;
2542 __be16 type;
2543
2544 type = skb_network_protocol(skb, &tmp);
2545 features = net_mpls_features(skb, features, type);
2514 2546
2515 if (skb->ip_summed != CHECKSUM_NONE && 2547 if (skb->ip_summed != CHECKSUM_NONE &&
2516 !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) { 2548 !can_checksum_protocol(features, type)) {
2517 features &= ~NETIF_F_ALL_CSUM; 2549 features &= ~NETIF_F_ALL_CSUM;
2518 } else if (illegal_highdma(skb->dev, skb)) { 2550 } else if (illegal_highdma(skb->dev, skb)) {
2519 features &= ~NETIF_F_SG; 2551 features &= ~NETIF_F_SG;
@@ -5689,10 +5721,6 @@ static void rollback_registered_many(struct list_head *head)
5689 */ 5721 */
5690 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5722 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5691 5723
5692 if (!dev->rtnl_link_ops ||
5693 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5694 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
5695
5696 /* 5724 /*
5697 * Flush the unicast and multicast chains 5725 * Flush the unicast and multicast chains
5698 */ 5726 */
@@ -5702,6 +5730,10 @@ static void rollback_registered_many(struct list_head *head)
5702 if (dev->netdev_ops->ndo_uninit) 5730 if (dev->netdev_ops->ndo_uninit)
5703 dev->netdev_ops->ndo_uninit(dev); 5731 dev->netdev_ops->ndo_uninit(dev);
5704 5732
5733 if (!dev->rtnl_link_ops ||
5734 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5735 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
5736
5705 /* Notifier chain MUST detach us all upper devices. */ 5737 /* Notifier chain MUST detach us all upper devices. */
5706 WARN_ON(netdev_has_any_upper_dev(dev)); 5738 WARN_ON(netdev_has_any_upper_dev(dev));
5707 5739
@@ -5927,10 +5959,7 @@ static void netdev_init_one_queue(struct net_device *dev,
5927 5959
5928static void netif_free_tx_queues(struct net_device *dev) 5960static void netif_free_tx_queues(struct net_device *dev)
5929{ 5961{
5930 if (is_vmalloc_addr(dev->_tx)) 5962 kvfree(dev->_tx);
5931 vfree(dev->_tx);
5932 else
5933 kfree(dev->_tx);
5934} 5963}
5935 5964
5936static int netif_alloc_netdev_queues(struct net_device *dev) 5965static int netif_alloc_netdev_queues(struct net_device *dev)
@@ -6404,10 +6433,7 @@ void netdev_freemem(struct net_device *dev)
6404{ 6433{
6405 char *addr = (char *)dev - dev->padded; 6434 char *addr = (char *)dev - dev->padded;
6406 6435
6407 if (is_vmalloc_addr(addr)) 6436 kvfree(addr);
6408 vfree(addr);
6409 else
6410 kfree(addr);
6411} 6437}
6412 6438
6413/** 6439/**
@@ -6512,11 +6538,6 @@ free_all:
6512 6538
6513free_pcpu: 6539free_pcpu:
6514 free_percpu(dev->pcpu_refcnt); 6540 free_percpu(dev->pcpu_refcnt);
6515 netif_free_tx_queues(dev);
6516#ifdef CONFIG_SYSFS
6517 kfree(dev->_rx);
6518#endif
6519
6520free_dev: 6541free_dev:
6521 netdev_freemem(dev); 6542 netdev_freemem(dev);
6522 return NULL; 6543 return NULL;
@@ -6613,6 +6634,9 @@ EXPORT_SYMBOL(unregister_netdevice_queue);
6613/** 6634/**
6614 * unregister_netdevice_many - unregister many devices 6635 * unregister_netdevice_many - unregister many devices
6615 * @head: list of devices 6636 * @head: list of devices
6637 *
6638 * Note: As most callers use a stack allocated list_head,
6639 * we force a list_del() to make sure stack wont be corrupted later.
6616 */ 6640 */
6617void unregister_netdevice_many(struct list_head *head) 6641void unregister_netdevice_many(struct list_head *head)
6618{ 6642{
@@ -6622,6 +6646,7 @@ void unregister_netdevice_many(struct list_head *head)
6622 rollback_registered_many(head); 6646 rollback_registered_many(head);
6623 list_for_each_entry(dev, head, unreg_list) 6647 list_for_each_entry(dev, head, unreg_list)
6624 net_set_todo(dev); 6648 net_set_todo(dev);
6649 list_del(head);
6625 } 6650 }
6626} 6651}
6627EXPORT_SYMBOL(unregister_netdevice_many); 6652EXPORT_SYMBOL(unregister_netdevice_many);
@@ -7077,7 +7102,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
7077 } 7102 }
7078 } 7103 }
7079 unregister_netdevice_many(&dev_kill_list); 7104 unregister_netdevice_many(&dev_kill_list);
7080 list_del(&dev_kill_list);
7081 rtnl_unlock(); 7105 rtnl_unlock();
7082} 7106}
7083 7107
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 329d5794e7dc..b6b230600b97 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
225} 225}
226EXPORT_SYMBOL(__hw_addr_unsync); 226EXPORT_SYMBOL(__hw_addr_unsync);
227 227
228/**
229 * __hw_addr_sync_dev - Synchonize device's multicast list
230 * @list: address list to syncronize
231 * @dev: device to sync
232 * @sync: function to call if address should be added
233 * @unsync: function to call if address should be removed
234 *
235 * This funciton is intended to be called from the ndo_set_rx_mode
236 * function of devices that require explicit address add/remove
237 * notifications. The unsync function may be NULL in which case
238 * the addresses requiring removal will simply be removed without
239 * any notification to the device.
240 **/
241int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
242 struct net_device *dev,
243 int (*sync)(struct net_device *, const unsigned char *),
244 int (*unsync)(struct net_device *,
245 const unsigned char *))
246{
247 struct netdev_hw_addr *ha, *tmp;
248 int err;
249
250 /* first go through and flush out any stale entries */
251 list_for_each_entry_safe(ha, tmp, &list->list, list) {
252 if (!ha->sync_cnt || ha->refcount != 1)
253 continue;
254
255 /* if unsync is defined and fails defer unsyncing address */
256 if (unsync && unsync(dev, ha->addr))
257 continue;
258
259 ha->sync_cnt--;
260 __hw_addr_del_entry(list, ha, false, false);
261 }
262
263 /* go through and sync new entries to the list */
264 list_for_each_entry_safe(ha, tmp, &list->list, list) {
265 if (ha->sync_cnt)
266 continue;
267
268 err = sync(dev, ha->addr);
269 if (err)
270 return err;
271
272 ha->sync_cnt++;
273 ha->refcount++;
274 }
275
276 return 0;
277}
278EXPORT_SYMBOL(__hw_addr_sync_dev);
279
280/**
281 * __hw_addr_unsync_dev - Remove synchonized addresses from device
282 * @list: address list to remove syncronized addresses from
283 * @dev: device to sync
284 * @unsync: function to call if address should be removed
285 *
286 * Remove all addresses that were added to the device by __hw_addr_sync_dev().
287 * This function is intended to be called from the ndo_stop or ndo_open
288 * functions on devices that require explicit address add/remove
289 * notifications. If the unsync function pointer is NULL then this function
290 * can be used to just reset the sync_cnt for the addresses in the list.
291 **/
292void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
293 struct net_device *dev,
294 int (*unsync)(struct net_device *,
295 const unsigned char *))
296{
297 struct netdev_hw_addr *ha, *tmp;
298
299 list_for_each_entry_safe(ha, tmp, &list->list, list) {
300 if (!ha->sync_cnt)
301 continue;
302
303 /* if unsync is defined and fails defer unsyncing address */
304 if (unsync && unsync(dev, ha->addr))
305 continue;
306
307 ha->sync_cnt--;
308 __hw_addr_del_entry(list, ha, false, false);
309 }
310}
311EXPORT_SYMBOL(__hw_addr_unsync_dev);
312
228static void __hw_addr_flush(struct netdev_hw_addr_list *list) 313static void __hw_addr_flush(struct netdev_hw_addr_list *list)
229{ 314{
230 struct netdev_hw_addr *ha, *tmp; 315 struct netdev_hw_addr *ha, *tmp;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 640ba0e5831c..17cb912793fa 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -557,6 +557,23 @@ err_out:
557 return ret; 557 return ret;
558} 558}
559 559
560static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
561 struct ethtool_rxnfc *rx_rings,
562 u32 size)
563{
564 int i;
565
566 if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
567 return -EFAULT;
568
569 /* Validate ring indices */
570 for (i = 0; i < size; i++)
571 if (indir[i] >= rx_rings->data)
572 return -EINVAL;
573
574 return 0;
575}
576
560static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, 577static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
561 void __user *useraddr) 578 void __user *useraddr)
562{ 579{
@@ -565,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
565 int ret; 582 int ret;
566 583
567 if (!dev->ethtool_ops->get_rxfh_indir_size || 584 if (!dev->ethtool_ops->get_rxfh_indir_size ||
568 !dev->ethtool_ops->get_rxfh_indir) 585 !dev->ethtool_ops->get_rxfh)
569 return -EOPNOTSUPP; 586 return -EOPNOTSUPP;
570 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); 587 dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
571 if (dev_size == 0) 588 if (dev_size == 0)
@@ -591,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
591 if (!indir) 608 if (!indir)
592 return -ENOMEM; 609 return -ENOMEM;
593 610
594 ret = dev->ethtool_ops->get_rxfh_indir(dev, indir); 611 ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
595 if (ret) 612 if (ret)
596 goto out; 613 goto out;
597 614
@@ -613,8 +630,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
613 u32 *indir; 630 u32 *indir;
614 const struct ethtool_ops *ops = dev->ethtool_ops; 631 const struct ethtool_ops *ops = dev->ethtool_ops;
615 int ret; 632 int ret;
633 u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
616 634
617 if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir || 635 if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
618 !ops->get_rxnfc) 636 !ops->get_rxnfc)
619 return -EOPNOTSUPP; 637 return -EOPNOTSUPP;
620 638
@@ -643,28 +661,184 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
643 for (i = 0; i < dev_size; i++) 661 for (i = 0; i < dev_size; i++)
644 indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data); 662 indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
645 } else { 663 } else {
646 if (copy_from_user(indir, 664 ret = ethtool_copy_validate_indir(indir,
647 useraddr + 665 useraddr + ringidx_offset,
648 offsetof(struct ethtool_rxfh_indir, 666 &rx_rings,
649 ring_index[0]), 667 dev_size);
650 dev_size * sizeof(indir[0]))) { 668 if (ret)
669 goto out;
670 }
671
672 ret = ops->set_rxfh(dev, indir, NULL);
673
674out:
675 kfree(indir);
676 return ret;
677}
678
679static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
680 void __user *useraddr)
681{
682 int ret;
683 const struct ethtool_ops *ops = dev->ethtool_ops;
684 u32 user_indir_size, user_key_size;
685 u32 dev_indir_size = 0, dev_key_size = 0;
686 struct ethtool_rxfh rxfh;
687 u32 total_size;
688 u32 indir_bytes;
689 u32 *indir = NULL;
690 u8 *hkey = NULL;
691 u8 *rss_config;
692
693 if (!(dev->ethtool_ops->get_rxfh_indir_size ||
694 dev->ethtool_ops->get_rxfh_key_size) ||
695 !dev->ethtool_ops->get_rxfh)
696 return -EOPNOTSUPP;
697
698 if (ops->get_rxfh_indir_size)
699 dev_indir_size = ops->get_rxfh_indir_size(dev);
700 if (ops->get_rxfh_key_size)
701 dev_key_size = ops->get_rxfh_key_size(dev);
702
703 if ((dev_key_size + dev_indir_size) == 0)
704 return -EOPNOTSUPP;
705
706 if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
707 return -EFAULT;
708 user_indir_size = rxfh.indir_size;
709 user_key_size = rxfh.key_size;
710
711 /* Check that reserved fields are 0 for now */
712 if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
713 return -EINVAL;
714
715 rxfh.indir_size = dev_indir_size;
716 rxfh.key_size = dev_key_size;
717 if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
718 return -EFAULT;
719
720 /* If the user buffer size is 0, this is just a query for the
721 * device table size and key size. Otherwise, if the User size is
722 * not equal to device table size or key size it's an error.
723 */
724 if (!user_indir_size && !user_key_size)
725 return 0;
726
727 if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
728 (user_key_size && (user_key_size != dev_key_size)))
729 return -EINVAL;
730
731 indir_bytes = user_indir_size * sizeof(indir[0]);
732 total_size = indir_bytes + user_key_size;
733 rss_config = kzalloc(total_size, GFP_USER);
734 if (!rss_config)
735 return -ENOMEM;
736
737 if (user_indir_size)
738 indir = (u32 *)rss_config;
739
740 if (user_key_size)
741 hkey = rss_config + indir_bytes;
742
743 ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
744 if (!ret) {
745 if (copy_to_user(useraddr +
746 offsetof(struct ethtool_rxfh, rss_config[0]),
747 rss_config, total_size))
651 ret = -EFAULT; 748 ret = -EFAULT;
749 }
750
751 kfree(rss_config);
752
753 return ret;
754}
755
756static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
757 void __user *useraddr)
758{
759 int ret;
760 const struct ethtool_ops *ops = dev->ethtool_ops;
761 struct ethtool_rxnfc rx_rings;
762 struct ethtool_rxfh rxfh;
763 u32 dev_indir_size = 0, dev_key_size = 0, i;
764 u32 *indir = NULL, indir_bytes = 0;
765 u8 *hkey = NULL;
766 u8 *rss_config;
767 u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
768
769 if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
770 !ops->get_rxnfc || !ops->set_rxfh)
771 return -EOPNOTSUPP;
772
773 if (ops->get_rxfh_indir_size)
774 dev_indir_size = ops->get_rxfh_indir_size(dev);
775 if (ops->get_rxfh_key_size)
776 dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
777 if ((dev_key_size + dev_indir_size) == 0)
778 return -EOPNOTSUPP;
779
780 if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
781 return -EFAULT;
782
783 /* Check that reserved fields are 0 for now */
784 if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
785 return -EINVAL;
786
787 /* If either indir or hash key is valid, proceed further.
788 * It is not valid to request that both be unchanged.
789 */
790 if ((rxfh.indir_size &&
791 rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
792 rxfh.indir_size != dev_indir_size) ||
793 (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
794 (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
795 rxfh.key_size == 0))
796 return -EINVAL;
797
798 if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
799 indir_bytes = dev_indir_size * sizeof(indir[0]);
800
801 rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
802 if (!rss_config)
803 return -ENOMEM;
804
805 rx_rings.cmd = ETHTOOL_GRXRINGS;
806 ret = ops->get_rxnfc(dev, &rx_rings, NULL);
807 if (ret)
808 goto out;
809
810 /* rxfh.indir_size == 0 means reset the indir table to default.
811 * rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE means leave it unchanged.
812 */
813 if (rxfh.indir_size &&
814 rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
815 indir = (u32 *)rss_config;
816 ret = ethtool_copy_validate_indir(indir,
817 useraddr + rss_cfg_offset,
818 &rx_rings,
819 rxfh.indir_size);
820 if (ret)
652 goto out; 821 goto out;
653 } 822 } else if (rxfh.indir_size == 0) {
823 indir = (u32 *)rss_config;
824 for (i = 0; i < dev_indir_size; i++)
825 indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
826 }
654 827
655 /* Validate ring indices */ 828 if (rxfh.key_size) {
656 for (i = 0; i < dev_size; i++) { 829 hkey = rss_config + indir_bytes;
657 if (indir[i] >= rx_rings.data) { 830 if (copy_from_user(hkey,
658 ret = -EINVAL; 831 useraddr + rss_cfg_offset + indir_bytes,
659 goto out; 832 rxfh.key_size)) {
660 } 833 ret = -EFAULT;
834 goto out;
661 } 835 }
662 } 836 }
663 837
664 ret = ops->set_rxfh_indir(dev, indir); 838 ret = ops->set_rxfh(dev, indir, hkey);
665 839
666out: 840out:
667 kfree(indir); 841 kfree(rss_config);
668 return ret; 842 return ret;
669} 843}
670 844
@@ -1491,6 +1665,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1491 case ETHTOOL_GRXCLSRULE: 1665 case ETHTOOL_GRXCLSRULE:
1492 case ETHTOOL_GRXCLSRLALL: 1666 case ETHTOOL_GRXCLSRLALL:
1493 case ETHTOOL_GRXFHINDIR: 1667 case ETHTOOL_GRXFHINDIR:
1668 case ETHTOOL_GRSSH:
1494 case ETHTOOL_GFEATURES: 1669 case ETHTOOL_GFEATURES:
1495 case ETHTOOL_GCHANNELS: 1670 case ETHTOOL_GCHANNELS:
1496 case ETHTOOL_GET_TS_INFO: 1671 case ETHTOOL_GET_TS_INFO:
@@ -1628,6 +1803,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1628 case ETHTOOL_SRXFHINDIR: 1803 case ETHTOOL_SRXFHINDIR:
1629 rc = ethtool_set_rxfh_indir(dev, useraddr); 1804 rc = ethtool_set_rxfh_indir(dev, useraddr);
1630 break; 1805 break;
1806 case ETHTOOL_GRSSH:
1807 rc = ethtool_get_rxfh(dev, useraddr);
1808 break;
1809 case ETHTOOL_SRSSH:
1810 rc = ethtool_set_rxfh(dev, useraddr);
1811 break;
1631 case ETHTOOL_GFEATURES: 1812 case ETHTOOL_GFEATURES:
1632 rc = ethtool_get_features(dev, useraddr); 1813 rc = ethtool_get_features(dev, useraddr);
1633 break; 1814 break;
diff --git a/net/core/filter.c b/net/core/filter.c
index 4aec7b93f1a9..735fad897496 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -45,6 +45,27 @@
45#include <linux/seccomp.h> 45#include <linux/seccomp.h>
46#include <linux/if_vlan.h> 46#include <linux/if_vlan.h>
47 47
48/* Registers */
49#define BPF_R0 regs[BPF_REG_0]
50#define BPF_R1 regs[BPF_REG_1]
51#define BPF_R2 regs[BPF_REG_2]
52#define BPF_R3 regs[BPF_REG_3]
53#define BPF_R4 regs[BPF_REG_4]
54#define BPF_R5 regs[BPF_REG_5]
55#define BPF_R6 regs[BPF_REG_6]
56#define BPF_R7 regs[BPF_REG_7]
57#define BPF_R8 regs[BPF_REG_8]
58#define BPF_R9 regs[BPF_REG_9]
59#define BPF_R10 regs[BPF_REG_10]
60
61/* Named registers */
62#define DST regs[insn->dst_reg]
63#define SRC regs[insn->src_reg]
64#define FP regs[BPF_REG_FP]
65#define ARG1 regs[BPF_REG_ARG1]
66#define CTX regs[BPF_REG_CTX]
67#define IMM insn->imm
68
48/* No hurry in this branch 69/* No hurry in this branch
49 * 70 *
50 * Exported for the bpf jit load helper. 71 * Exported for the bpf jit load helper.
@@ -57,9 +78,9 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
57 ptr = skb_network_header(skb) + k - SKF_NET_OFF; 78 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
58 else if (k >= SKF_LL_OFF) 79 else if (k >= SKF_LL_OFF)
59 ptr = skb_mac_header(skb) + k - SKF_LL_OFF; 80 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
60
61 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) 81 if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
62 return ptr; 82 return ptr;
83
63 return NULL; 84 return NULL;
64} 85}
65 86
@@ -68,6 +89,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
68{ 89{
69 if (k >= 0) 90 if (k >= 0)
70 return skb_header_pointer(skb, k, size, buffer); 91 return skb_header_pointer(skb, k, size, buffer);
92
71 return bpf_internal_load_pointer_neg_helper(skb, k, size); 93 return bpf_internal_load_pointer_neg_helper(skb, k, size);
72} 94}
73 95
@@ -122,13 +144,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
122 return 0; 144 return 0;
123} 145}
124 146
125/* Register mappings for user programs. */
126#define A_REG 0
127#define X_REG 7
128#define TMP_REG 8
129#define ARG2_REG 2
130#define ARG3_REG 3
131
132/** 147/**
133 * __sk_run_filter - run a filter on a given context 148 * __sk_run_filter - run a filter on a given context
134 * @ctx: buffer to run the filter on 149 * @ctx: buffer to run the filter on
@@ -138,447 +153,442 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
138 * keep, 0 for none. @ctx is the data we are operating on, @insn is the 153 * keep, 0 for none. @ctx is the data we are operating on, @insn is the
139 * array of filter instructions. 154 * array of filter instructions.
140 */ 155 */
141unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn) 156static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
142{ 157{
143 u64 stack[MAX_BPF_STACK / sizeof(u64)]; 158 u64 stack[MAX_BPF_STACK / sizeof(u64)];
144 u64 regs[MAX_BPF_REG], tmp; 159 u64 regs[MAX_BPF_REG], tmp;
145 void *ptr;
146 int off;
147
148#define K insn->imm
149#define A regs[insn->a_reg]
150#define X regs[insn->x_reg]
151#define R0 regs[0]
152
153#define CONT ({insn++; goto select_insn; })
154#define CONT_JMP ({insn++; goto select_insn; })
155
156 static const void *jumptable[256] = { 160 static const void *jumptable[256] = {
157 [0 ... 255] = &&default_label, 161 [0 ... 255] = &&default_label,
158 /* Now overwrite non-defaults ... */ 162 /* Now overwrite non-defaults ... */
159#define DL(A, B, C) [A|B|C] = &&A##_##B##_##C 163 /* 32 bit ALU operations */
160 DL(BPF_ALU, BPF_ADD, BPF_X), 164 [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
161 DL(BPF_ALU, BPF_ADD, BPF_K), 165 [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
162 DL(BPF_ALU, BPF_SUB, BPF_X), 166 [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
163 DL(BPF_ALU, BPF_SUB, BPF_K), 167 [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
164 DL(BPF_ALU, BPF_AND, BPF_X), 168 [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
165 DL(BPF_ALU, BPF_AND, BPF_K), 169 [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
166 DL(BPF_ALU, BPF_OR, BPF_X), 170 [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
167 DL(BPF_ALU, BPF_OR, BPF_K), 171 [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
168 DL(BPF_ALU, BPF_LSH, BPF_X), 172 [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
169 DL(BPF_ALU, BPF_LSH, BPF_K), 173 [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
170 DL(BPF_ALU, BPF_RSH, BPF_X), 174 [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
171 DL(BPF_ALU, BPF_RSH, BPF_K), 175 [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
172 DL(BPF_ALU, BPF_XOR, BPF_X), 176 [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
173 DL(BPF_ALU, BPF_XOR, BPF_K), 177 [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
174 DL(BPF_ALU, BPF_MUL, BPF_X), 178 [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
175 DL(BPF_ALU, BPF_MUL, BPF_K), 179 [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
176 DL(BPF_ALU, BPF_MOV, BPF_X), 180 [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
177 DL(BPF_ALU, BPF_MOV, BPF_K), 181 [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
178 DL(BPF_ALU, BPF_DIV, BPF_X), 182 [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
179 DL(BPF_ALU, BPF_DIV, BPF_K), 183 [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
180 DL(BPF_ALU, BPF_MOD, BPF_X), 184 [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
181 DL(BPF_ALU, BPF_MOD, BPF_K), 185 [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
182 DL(BPF_ALU, BPF_NEG, 0), 186 [BPF_ALU | BPF_NEG] = &&ALU_NEG,
183 DL(BPF_ALU, BPF_END, BPF_TO_BE), 187 [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
184 DL(BPF_ALU, BPF_END, BPF_TO_LE), 188 [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
185 DL(BPF_ALU64, BPF_ADD, BPF_X), 189 /* 64 bit ALU operations */
186 DL(BPF_ALU64, BPF_ADD, BPF_K), 190 [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
187 DL(BPF_ALU64, BPF_SUB, BPF_X), 191 [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
188 DL(BPF_ALU64, BPF_SUB, BPF_K), 192 [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
189 DL(BPF_ALU64, BPF_AND, BPF_X), 193 [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
190 DL(BPF_ALU64, BPF_AND, BPF_K), 194 [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
191 DL(BPF_ALU64, BPF_OR, BPF_X), 195 [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
192 DL(BPF_ALU64, BPF_OR, BPF_K), 196 [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
193 DL(BPF_ALU64, BPF_LSH, BPF_X), 197 [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
194 DL(BPF_ALU64, BPF_LSH, BPF_K), 198 [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
195 DL(BPF_ALU64, BPF_RSH, BPF_X), 199 [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
196 DL(BPF_ALU64, BPF_RSH, BPF_K), 200 [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
197 DL(BPF_ALU64, BPF_XOR, BPF_X), 201 [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
198 DL(BPF_ALU64, BPF_XOR, BPF_K), 202 [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
199 DL(BPF_ALU64, BPF_MUL, BPF_X), 203 [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
200 DL(BPF_ALU64, BPF_MUL, BPF_K), 204 [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
201 DL(BPF_ALU64, BPF_MOV, BPF_X), 205 [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
202 DL(BPF_ALU64, BPF_MOV, BPF_K), 206 [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
203 DL(BPF_ALU64, BPF_ARSH, BPF_X), 207 [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
204 DL(BPF_ALU64, BPF_ARSH, BPF_K), 208 [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
205 DL(BPF_ALU64, BPF_DIV, BPF_X), 209 [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
206 DL(BPF_ALU64, BPF_DIV, BPF_K), 210 [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
207 DL(BPF_ALU64, BPF_MOD, BPF_X), 211 [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
208 DL(BPF_ALU64, BPF_MOD, BPF_K), 212 [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
209 DL(BPF_ALU64, BPF_NEG, 0), 213 [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
210 DL(BPF_JMP, BPF_CALL, 0), 214 [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
211 DL(BPF_JMP, BPF_JA, 0), 215 /* Call instruction */
212 DL(BPF_JMP, BPF_JEQ, BPF_X), 216 [BPF_JMP | BPF_CALL] = &&JMP_CALL,
213 DL(BPF_JMP, BPF_JEQ, BPF_K), 217 /* Jumps */
214 DL(BPF_JMP, BPF_JNE, BPF_X), 218 [BPF_JMP | BPF_JA] = &&JMP_JA,
215 DL(BPF_JMP, BPF_JNE, BPF_K), 219 [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
216 DL(BPF_JMP, BPF_JGT, BPF_X), 220 [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
217 DL(BPF_JMP, BPF_JGT, BPF_K), 221 [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
218 DL(BPF_JMP, BPF_JGE, BPF_X), 222 [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
219 DL(BPF_JMP, BPF_JGE, BPF_K), 223 [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
220 DL(BPF_JMP, BPF_JSGT, BPF_X), 224 [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
221 DL(BPF_JMP, BPF_JSGT, BPF_K), 225 [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
222 DL(BPF_JMP, BPF_JSGE, BPF_X), 226 [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
223 DL(BPF_JMP, BPF_JSGE, BPF_K), 227 [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
224 DL(BPF_JMP, BPF_JSET, BPF_X), 228 [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
225 DL(BPF_JMP, BPF_JSET, BPF_K), 229 [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
226 DL(BPF_JMP, BPF_EXIT, 0), 230 [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
227 DL(BPF_STX, BPF_MEM, BPF_B), 231 [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
228 DL(BPF_STX, BPF_MEM, BPF_H), 232 [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
229 DL(BPF_STX, BPF_MEM, BPF_W), 233 /* Program return */
230 DL(BPF_STX, BPF_MEM, BPF_DW), 234 [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
231 DL(BPF_STX, BPF_XADD, BPF_W), 235 /* Store instructions */
232 DL(BPF_STX, BPF_XADD, BPF_DW), 236 [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
233 DL(BPF_ST, BPF_MEM, BPF_B), 237 [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
234 DL(BPF_ST, BPF_MEM, BPF_H), 238 [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
235 DL(BPF_ST, BPF_MEM, BPF_W), 239 [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
236 DL(BPF_ST, BPF_MEM, BPF_DW), 240 [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
237 DL(BPF_LDX, BPF_MEM, BPF_B), 241 [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
238 DL(BPF_LDX, BPF_MEM, BPF_H), 242 [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
239 DL(BPF_LDX, BPF_MEM, BPF_W), 243 [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
240 DL(BPF_LDX, BPF_MEM, BPF_DW), 244 [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
241 DL(BPF_LD, BPF_ABS, BPF_W), 245 [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
242 DL(BPF_LD, BPF_ABS, BPF_H), 246 /* Load instructions */
243 DL(BPF_LD, BPF_ABS, BPF_B), 247 [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
244 DL(BPF_LD, BPF_IND, BPF_W), 248 [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
245 DL(BPF_LD, BPF_IND, BPF_H), 249 [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
246 DL(BPF_LD, BPF_IND, BPF_B), 250 [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
247#undef DL 251 [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
252 [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
253 [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
254 [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
255 [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
256 [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
248 }; 257 };
258 void *ptr;
259 int off;
260
261#define CONT ({ insn++; goto select_insn; })
262#define CONT_JMP ({ insn++; goto select_insn; })
249 263
250 regs[FP_REG] = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; 264 FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
251 regs[ARG1_REG] = (u64) (unsigned long) ctx; 265 ARG1 = (u64) (unsigned long) ctx;
252 regs[A_REG] = 0; 266
253 regs[X_REG] = 0; 267 /* Registers used in classic BPF programs need to be reset first. */
268 regs[BPF_REG_A] = 0;
269 regs[BPF_REG_X] = 0;
254 270
255select_insn: 271select_insn:
256 goto *jumptable[insn->code]; 272 goto *jumptable[insn->code];
257 273
258 /* ALU */ 274 /* ALU */
259#define ALU(OPCODE, OP) \ 275#define ALU(OPCODE, OP) \
260 BPF_ALU64_##OPCODE##_BPF_X: \ 276 ALU64_##OPCODE##_X: \
261 A = A OP X; \ 277 DST = DST OP SRC; \
262 CONT; \ 278 CONT; \
263 BPF_ALU_##OPCODE##_BPF_X: \ 279 ALU_##OPCODE##_X: \
264 A = (u32) A OP (u32) X; \ 280 DST = (u32) DST OP (u32) SRC; \
265 CONT; \ 281 CONT; \
266 BPF_ALU64_##OPCODE##_BPF_K: \ 282 ALU64_##OPCODE##_K: \
267 A = A OP K; \ 283 DST = DST OP IMM; \
268 CONT; \ 284 CONT; \
269 BPF_ALU_##OPCODE##_BPF_K: \ 285 ALU_##OPCODE##_K: \
270 A = (u32) A OP (u32) K; \ 286 DST = (u32) DST OP (u32) IMM; \
271 CONT; 287 CONT;
272 288
273 ALU(BPF_ADD, +) 289 ALU(ADD, +)
274 ALU(BPF_SUB, -) 290 ALU(SUB, -)
275 ALU(BPF_AND, &) 291 ALU(AND, &)
276 ALU(BPF_OR, |) 292 ALU(OR, |)
277 ALU(BPF_LSH, <<) 293 ALU(LSH, <<)
278 ALU(BPF_RSH, >>) 294 ALU(RSH, >>)
279 ALU(BPF_XOR, ^) 295 ALU(XOR, ^)
280 ALU(BPF_MUL, *) 296 ALU(MUL, *)
281#undef ALU 297#undef ALU
282 BPF_ALU_BPF_NEG_0: 298 ALU_NEG:
283 A = (u32) -A; 299 DST = (u32) -DST;
284 CONT; 300 CONT;
285 BPF_ALU64_BPF_NEG_0: 301 ALU64_NEG:
286 A = -A; 302 DST = -DST;
287 CONT; 303 CONT;
288 BPF_ALU_BPF_MOV_BPF_X: 304 ALU_MOV_X:
289 A = (u32) X; 305 DST = (u32) SRC;
290 CONT; 306 CONT;
291 BPF_ALU_BPF_MOV_BPF_K: 307 ALU_MOV_K:
292 A = (u32) K; 308 DST = (u32) IMM;
293 CONT; 309 CONT;
294 BPF_ALU64_BPF_MOV_BPF_X: 310 ALU64_MOV_X:
295 A = X; 311 DST = SRC;
296 CONT; 312 CONT;
297 BPF_ALU64_BPF_MOV_BPF_K: 313 ALU64_MOV_K:
298 A = K; 314 DST = IMM;
299 CONT; 315 CONT;
300 BPF_ALU64_BPF_ARSH_BPF_X: 316 ALU64_ARSH_X:
301 (*(s64 *) &A) >>= X; 317 (*(s64 *) &DST) >>= SRC;
302 CONT; 318 CONT;
303 BPF_ALU64_BPF_ARSH_BPF_K: 319 ALU64_ARSH_K:
304 (*(s64 *) &A) >>= K; 320 (*(s64 *) &DST) >>= IMM;
305 CONT; 321 CONT;
306 BPF_ALU64_BPF_MOD_BPF_X: 322 ALU64_MOD_X:
307 if (unlikely(X == 0)) 323 if (unlikely(SRC == 0))
308 return 0; 324 return 0;
309 tmp = A; 325 tmp = DST;
310 A = do_div(tmp, X); 326 DST = do_div(tmp, SRC);
311 CONT; 327 CONT;
312 BPF_ALU_BPF_MOD_BPF_X: 328 ALU_MOD_X:
313 if (unlikely(X == 0)) 329 if (unlikely(SRC == 0))
314 return 0; 330 return 0;
315 tmp = (u32) A; 331 tmp = (u32) DST;
316 A = do_div(tmp, (u32) X); 332 DST = do_div(tmp, (u32) SRC);
317 CONT; 333 CONT;
318 BPF_ALU64_BPF_MOD_BPF_K: 334 ALU64_MOD_K:
319 tmp = A; 335 tmp = DST;
320 A = do_div(tmp, K); 336 DST = do_div(tmp, IMM);
321 CONT; 337 CONT;
322 BPF_ALU_BPF_MOD_BPF_K: 338 ALU_MOD_K:
323 tmp = (u32) A; 339 tmp = (u32) DST;
324 A = do_div(tmp, (u32) K); 340 DST = do_div(tmp, (u32) IMM);
325 CONT; 341 CONT;
326 BPF_ALU64_BPF_DIV_BPF_X: 342 ALU64_DIV_X:
327 if (unlikely(X == 0)) 343 if (unlikely(SRC == 0))
328 return 0; 344 return 0;
329 do_div(A, X); 345 do_div(DST, SRC);
330 CONT; 346 CONT;
331 BPF_ALU_BPF_DIV_BPF_X: 347 ALU_DIV_X:
332 if (unlikely(X == 0)) 348 if (unlikely(SRC == 0))
333 return 0; 349 return 0;
334 tmp = (u32) A; 350 tmp = (u32) DST;
335 do_div(tmp, (u32) X); 351 do_div(tmp, (u32) SRC);
336 A = (u32) tmp; 352 DST = (u32) tmp;
337 CONT; 353 CONT;
338 BPF_ALU64_BPF_DIV_BPF_K: 354 ALU64_DIV_K:
339 do_div(A, K); 355 do_div(DST, IMM);
340 CONT; 356 CONT;
341 BPF_ALU_BPF_DIV_BPF_K: 357 ALU_DIV_K:
342 tmp = (u32) A; 358 tmp = (u32) DST;
343 do_div(tmp, (u32) K); 359 do_div(tmp, (u32) IMM);
344 A = (u32) tmp; 360 DST = (u32) tmp;
345 CONT; 361 CONT;
346 BPF_ALU_BPF_END_BPF_TO_BE: 362 ALU_END_TO_BE:
347 switch (K) { 363 switch (IMM) {
348 case 16: 364 case 16:
349 A = (__force u16) cpu_to_be16(A); 365 DST = (__force u16) cpu_to_be16(DST);
350 break; 366 break;
351 case 32: 367 case 32:
352 A = (__force u32) cpu_to_be32(A); 368 DST = (__force u32) cpu_to_be32(DST);
353 break; 369 break;
354 case 64: 370 case 64:
355 A = (__force u64) cpu_to_be64(A); 371 DST = (__force u64) cpu_to_be64(DST);
356 break; 372 break;
357 } 373 }
358 CONT; 374 CONT;
359 BPF_ALU_BPF_END_BPF_TO_LE: 375 ALU_END_TO_LE:
360 switch (K) { 376 switch (IMM) {
361 case 16: 377 case 16:
362 A = (__force u16) cpu_to_le16(A); 378 DST = (__force u16) cpu_to_le16(DST);
363 break; 379 break;
364 case 32: 380 case 32:
365 A = (__force u32) cpu_to_le32(A); 381 DST = (__force u32) cpu_to_le32(DST);
366 break; 382 break;
367 case 64: 383 case 64:
368 A = (__force u64) cpu_to_le64(A); 384 DST = (__force u64) cpu_to_le64(DST);
369 break; 385 break;
370 } 386 }
371 CONT; 387 CONT;
372 388
373 /* CALL */ 389 /* CALL */
374 BPF_JMP_BPF_CALL_0: 390 JMP_CALL:
375 /* Function call scratches R1-R5 registers, preserves R6-R9, 391 /* Function call scratches BPF_R1-BPF_R5 registers,
376 * and stores return value into R0. 392 * preserves BPF_R6-BPF_R9, and stores return value
393 * into BPF_R0.
377 */ 394 */
378 R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3], 395 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
379 regs[4], regs[5]); 396 BPF_R4, BPF_R5);
380 CONT; 397 CONT;
381 398
382 /* JMP */ 399 /* JMP */
383 BPF_JMP_BPF_JA_0: 400 JMP_JA:
384 insn += insn->off; 401 insn += insn->off;
385 CONT; 402 CONT;
386 BPF_JMP_BPF_JEQ_BPF_X: 403 JMP_JEQ_X:
387 if (A == X) { 404 if (DST == SRC) {
388 insn += insn->off; 405 insn += insn->off;
389 CONT_JMP; 406 CONT_JMP;
390 } 407 }
391 CONT; 408 CONT;
392 BPF_JMP_BPF_JEQ_BPF_K: 409 JMP_JEQ_K:
393 if (A == K) { 410 if (DST == IMM) {
394 insn += insn->off; 411 insn += insn->off;
395 CONT_JMP; 412 CONT_JMP;
396 } 413 }
397 CONT; 414 CONT;
398 BPF_JMP_BPF_JNE_BPF_X: 415 JMP_JNE_X:
399 if (A != X) { 416 if (DST != SRC) {
400 insn += insn->off; 417 insn += insn->off;
401 CONT_JMP; 418 CONT_JMP;
402 } 419 }
403 CONT; 420 CONT;
404 BPF_JMP_BPF_JNE_BPF_K: 421 JMP_JNE_K:
405 if (A != K) { 422 if (DST != IMM) {
406 insn += insn->off; 423 insn += insn->off;
407 CONT_JMP; 424 CONT_JMP;
408 } 425 }
409 CONT; 426 CONT;
410 BPF_JMP_BPF_JGT_BPF_X: 427 JMP_JGT_X:
411 if (A > X) { 428 if (DST > SRC) {
412 insn += insn->off; 429 insn += insn->off;
413 CONT_JMP; 430 CONT_JMP;
414 } 431 }
415 CONT; 432 CONT;
416 BPF_JMP_BPF_JGT_BPF_K: 433 JMP_JGT_K:
417 if (A > K) { 434 if (DST > IMM) {
418 insn += insn->off; 435 insn += insn->off;
419 CONT_JMP; 436 CONT_JMP;
420 } 437 }
421 CONT; 438 CONT;
422 BPF_JMP_BPF_JGE_BPF_X: 439 JMP_JGE_X:
423 if (A >= X) { 440 if (DST >= SRC) {
424 insn += insn->off; 441 insn += insn->off;
425 CONT_JMP; 442 CONT_JMP;
426 } 443 }
427 CONT; 444 CONT;
428 BPF_JMP_BPF_JGE_BPF_K: 445 JMP_JGE_K:
429 if (A >= K) { 446 if (DST >= IMM) {
430 insn += insn->off; 447 insn += insn->off;
431 CONT_JMP; 448 CONT_JMP;
432 } 449 }
433 CONT; 450 CONT;
434 BPF_JMP_BPF_JSGT_BPF_X: 451 JMP_JSGT_X:
435 if (((s64)A) > ((s64)X)) { 452 if (((s64) DST) > ((s64) SRC)) {
436 insn += insn->off; 453 insn += insn->off;
437 CONT_JMP; 454 CONT_JMP;
438 } 455 }
439 CONT; 456 CONT;
440 BPF_JMP_BPF_JSGT_BPF_K: 457 JMP_JSGT_K:
441 if (((s64)A) > ((s64)K)) { 458 if (((s64) DST) > ((s64) IMM)) {
442 insn += insn->off; 459 insn += insn->off;
443 CONT_JMP; 460 CONT_JMP;
444 } 461 }
445 CONT; 462 CONT;
446 BPF_JMP_BPF_JSGE_BPF_X: 463 JMP_JSGE_X:
447 if (((s64)A) >= ((s64)X)) { 464 if (((s64) DST) >= ((s64) SRC)) {
448 insn += insn->off; 465 insn += insn->off;
449 CONT_JMP; 466 CONT_JMP;
450 } 467 }
451 CONT; 468 CONT;
452 BPF_JMP_BPF_JSGE_BPF_K: 469 JMP_JSGE_K:
453 if (((s64)A) >= ((s64)K)) { 470 if (((s64) DST) >= ((s64) IMM)) {
454 insn += insn->off; 471 insn += insn->off;
455 CONT_JMP; 472 CONT_JMP;
456 } 473 }
457 CONT; 474 CONT;
458 BPF_JMP_BPF_JSET_BPF_X: 475 JMP_JSET_X:
459 if (A & X) { 476 if (DST & SRC) {
460 insn += insn->off; 477 insn += insn->off;
461 CONT_JMP; 478 CONT_JMP;
462 } 479 }
463 CONT; 480 CONT;
464 BPF_JMP_BPF_JSET_BPF_K: 481 JMP_JSET_K:
465 if (A & K) { 482 if (DST & IMM) {
466 insn += insn->off; 483 insn += insn->off;
467 CONT_JMP; 484 CONT_JMP;
468 } 485 }
469 CONT; 486 CONT;
470 BPF_JMP_BPF_EXIT_0: 487 JMP_EXIT:
471 return R0; 488 return BPF_R0;
472 489
473 /* STX and ST and LDX*/ 490 /* STX and ST and LDX*/
474#define LDST(SIZEOP, SIZE) \ 491#define LDST(SIZEOP, SIZE) \
475 BPF_STX_BPF_MEM_##SIZEOP: \ 492 STX_MEM_##SIZEOP: \
476 *(SIZE *)(unsigned long) (A + insn->off) = X; \ 493 *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \
477 CONT; \ 494 CONT; \
478 BPF_ST_BPF_MEM_##SIZEOP: \ 495 ST_MEM_##SIZEOP: \
479 *(SIZE *)(unsigned long) (A + insn->off) = K; \ 496 *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \
480 CONT; \ 497 CONT; \
481 BPF_LDX_BPF_MEM_##SIZEOP: \ 498 LDX_MEM_##SIZEOP: \
482 A = *(SIZE *)(unsigned long) (X + insn->off); \ 499 DST = *(SIZE *)(unsigned long) (SRC + insn->off); \
483 CONT; 500 CONT;
484 501
485 LDST(BPF_B, u8) 502 LDST(B, u8)
486 LDST(BPF_H, u16) 503 LDST(H, u16)
487 LDST(BPF_W, u32) 504 LDST(W, u32)
488 LDST(BPF_DW, u64) 505 LDST(DW, u64)
489#undef LDST 506#undef LDST
490 BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */ 507 STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
491 atomic_add((u32) X, (atomic_t *)(unsigned long) 508 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
492 (A + insn->off)); 509 (DST + insn->off));
493 CONT; 510 CONT;
494 BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ 511 STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
495 atomic64_add((u64) X, (atomic64_t *)(unsigned long) 512 atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
496 (A + insn->off)); 513 (DST + insn->off));
497 CONT; 514 CONT;
498 BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */ 515 LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
499 off = K; 516 off = IMM;
500load_word: 517load_word:
501 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only 518 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
502 * appearing in the programs where ctx == skb. All programs 519 * only appearing in the programs where ctx ==
503 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter() 520 * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
504 * saves it in R6, internal BPF verifier will check that 521 * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
505 * R6 == ctx. 522 * internal BPF verifier will check that BPF_R6 ==
523 * ctx.
506 * 524 *
507 * BPF_ABS and BPF_IND are wrappers of function calls, so 525 * BPF_ABS and BPF_IND are wrappers of function calls,
508 * they scratch R1-R5 registers, preserve R6-R9, and store 526 * so they scratch BPF_R1-BPF_R5 registers, preserve
509 * return value into R0. 527 * BPF_R6-BPF_R9, and store return value into BPF_R0.
510 * 528 *
511 * Implicit input: 529 * Implicit input:
512 * ctx 530 * ctx == skb == BPF_R6 == CTX
513 * 531 *
514 * Explicit input: 532 * Explicit input:
515 * X == any register 533 * SRC == any register
516 * K == 32-bit immediate 534 * IMM == 32-bit immediate
517 * 535 *
518 * Output: 536 * Output:
519 * R0 - 8/16/32-bit skb data converted to cpu endianness 537 * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
520 */ 538 */
521 ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); 539
540 ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
522 if (likely(ptr != NULL)) { 541 if (likely(ptr != NULL)) {
523 R0 = get_unaligned_be32(ptr); 542 BPF_R0 = get_unaligned_be32(ptr);
524 CONT; 543 CONT;
525 } 544 }
545
526 return 0; 546 return 0;
527 BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */ 547 LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
528 off = K; 548 off = IMM;
529load_half: 549load_half:
530 ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); 550 ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
531 if (likely(ptr != NULL)) { 551 if (likely(ptr != NULL)) {
532 R0 = get_unaligned_be16(ptr); 552 BPF_R0 = get_unaligned_be16(ptr);
533 CONT; 553 CONT;
534 } 554 }
555
535 return 0; 556 return 0;
536 BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */ 557 LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
537 off = K; 558 off = IMM;
538load_byte: 559load_byte:
539 ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); 560 ptr = load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
540 if (likely(ptr != NULL)) { 561 if (likely(ptr != NULL)) {
541 R0 = *(u8 *)ptr; 562 BPF_R0 = *(u8 *)ptr;
542 CONT; 563 CONT;
543 } 564 }
565
544 return 0; 566 return 0;
545 BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */ 567 LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
546 off = K + X; 568 off = IMM + SRC;
547 goto load_word; 569 goto load_word;
548 BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */ 570 LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
549 off = K + X; 571 off = IMM + SRC;
550 goto load_half; 572 goto load_half;
551 BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */ 573 LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
552 off = K + X; 574 off = IMM + SRC;
553 goto load_byte; 575 goto load_byte;
554 576
555 default_label: 577 default_label:
556 /* If we ever reach this, we have a bug somewhere. */ 578 /* If we ever reach this, we have a bug somewhere. */
557 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code); 579 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
558 return 0; 580 return 0;
559#undef CONT_JMP
560#undef CONT
561
562#undef R0
563#undef X
564#undef A
565#undef K
566} 581}
567 582
568u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
569 const struct sock_filter_int *insni)
570 __attribute__ ((alias ("__sk_run_filter")));
571
572u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
573 const struct sock_filter_int *insni)
574 __attribute__ ((alias ("__sk_run_filter")));
575EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
576
577/* Helper to find the offset of pkt_type in sk_buff structure. We want 583/* Helper to find the offset of pkt_type in sk_buff structure. We want
578 * to make sure its still a 3bit field starting at a byte boundary; 584 * to make sure its still a 3bit field starting at a byte boundary;
579 * taken from arch/x86/net/bpf_jit_comp.c. 585 * taken from arch/x86/net/bpf_jit_comp.c.
580 */ 586 */
587#ifdef __BIG_ENDIAN_BITFIELD
588#define PKT_TYPE_MAX (7 << 5)
589#else
581#define PKT_TYPE_MAX 7 590#define PKT_TYPE_MAX 7
591#endif
582static unsigned int pkt_type_offset(void) 592static unsigned int pkt_type_offset(void)
583{ 593{
584 struct sk_buff skb_probe = { .pkt_type = ~0, }; 594 struct sk_buff skb_probe = { .pkt_type = ~0, };
@@ -594,16 +604,14 @@ static unsigned int pkt_type_offset(void)
594 return -1; 604 return -1;
595} 605}
596 606
597static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) 607static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
598{ 608{
599 struct sk_buff *skb = (struct sk_buff *)(long) ctx; 609 return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
600
601 return __skb_get_poff(skb);
602} 610}
603 611
604static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) 612static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
605{ 613{
606 struct sk_buff *skb = (struct sk_buff *)(long) ctx; 614 struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
607 struct nlattr *nla; 615 struct nlattr *nla;
608 616
609 if (skb_is_nonlinear(skb)) 617 if (skb_is_nonlinear(skb))
@@ -612,19 +620,19 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
612 if (skb->len < sizeof(struct nlattr)) 620 if (skb->len < sizeof(struct nlattr))
613 return 0; 621 return 0;
614 622
615 if (A > skb->len - sizeof(struct nlattr)) 623 if (a > skb->len - sizeof(struct nlattr))
616 return 0; 624 return 0;
617 625
618 nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X); 626 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
619 if (nla) 627 if (nla)
620 return (void *) nla - (void *) skb->data; 628 return (void *) nla - (void *) skb->data;
621 629
622 return 0; 630 return 0;
623} 631}
624 632
625static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) 633static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
626{ 634{
627 struct sk_buff *skb = (struct sk_buff *)(long) ctx; 635 struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
628 struct nlattr *nla; 636 struct nlattr *nla;
629 637
630 if (skb_is_nonlinear(skb)) 638 if (skb_is_nonlinear(skb))
@@ -633,25 +641,31 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
633 if (skb->len < sizeof(struct nlattr)) 641 if (skb->len < sizeof(struct nlattr))
634 return 0; 642 return 0;
635 643
636 if (A > skb->len - sizeof(struct nlattr)) 644 if (a > skb->len - sizeof(struct nlattr))
637 return 0; 645 return 0;
638 646
639 nla = (struct nlattr *) &skb->data[A]; 647 nla = (struct nlattr *) &skb->data[a];
640 if (nla->nla_len > skb->len - A) 648 if (nla->nla_len > skb->len - a)
641 return 0; 649 return 0;
642 650
643 nla = nla_find_nested(nla, X); 651 nla = nla_find_nested(nla, x);
644 if (nla) 652 if (nla)
645 return (void *) nla - (void *) skb->data; 653 return (void *) nla - (void *) skb->data;
646 654
647 return 0; 655 return 0;
648} 656}
649 657
650static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) 658static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
651{ 659{
652 return raw_smp_processor_id(); 660 return raw_smp_processor_id();
653} 661}
654 662
663/* note that this only generates 32-bit random numbers */
664static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
665{
666 return prandom_u32();
667}
668
655static bool convert_bpf_extensions(struct sock_filter *fp, 669static bool convert_bpf_extensions(struct sock_filter *fp,
656 struct sock_filter_int **insnp) 670 struct sock_filter_int **insnp)
657{ 671{
@@ -661,119 +675,83 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
661 case SKF_AD_OFF + SKF_AD_PROTOCOL: 675 case SKF_AD_OFF + SKF_AD_PROTOCOL:
662 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); 676 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
663 677
664 insn->code = BPF_LDX | BPF_MEM | BPF_H; 678 /* A = *(u16 *) (CTX + offsetof(protocol)) */
665 insn->a_reg = A_REG; 679 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
666 insn->x_reg = CTX_REG; 680 offsetof(struct sk_buff, protocol));
667 insn->off = offsetof(struct sk_buff, protocol);
668 insn++;
669
670 /* A = ntohs(A) [emitting a nop or swap16] */ 681 /* A = ntohs(A) [emitting a nop or swap16] */
671 insn->code = BPF_ALU | BPF_END | BPF_FROM_BE; 682 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
672 insn->a_reg = A_REG;
673 insn->imm = 16;
674 break; 683 break;
675 684
676 case SKF_AD_OFF + SKF_AD_PKTTYPE: 685 case SKF_AD_OFF + SKF_AD_PKTTYPE:
677 insn->code = BPF_LDX | BPF_MEM | BPF_B; 686 *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
678 insn->a_reg = A_REG; 687 pkt_type_offset());
679 insn->x_reg = CTX_REG;
680 insn->off = pkt_type_offset();
681 if (insn->off < 0) 688 if (insn->off < 0)
682 return false; 689 return false;
683 insn++; 690 insn++;
684 691 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
685 insn->code = BPF_ALU | BPF_AND | BPF_K; 692#ifdef __BIG_ENDIAN_BITFIELD
686 insn->a_reg = A_REG; 693 insn++;
687 insn->imm = PKT_TYPE_MAX; 694 *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
695#endif
688 break; 696 break;
689 697
690 case SKF_AD_OFF + SKF_AD_IFINDEX: 698 case SKF_AD_OFF + SKF_AD_IFINDEX:
691 case SKF_AD_OFF + SKF_AD_HATYPE: 699 case SKF_AD_OFF + SKF_AD_HATYPE:
692 if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
693 insn->code = BPF_LDX | BPF_MEM | BPF_DW;
694 else
695 insn->code = BPF_LDX | BPF_MEM | BPF_W;
696 insn->a_reg = TMP_REG;
697 insn->x_reg = CTX_REG;
698 insn->off = offsetof(struct sk_buff, dev);
699 insn++;
700
701 insn->code = BPF_JMP | BPF_JNE | BPF_K;
702 insn->a_reg = TMP_REG;
703 insn->imm = 0;
704 insn->off = 1;
705 insn++;
706
707 insn->code = BPF_JMP | BPF_EXIT;
708 insn++;
709
710 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); 700 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
711 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); 701 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
712 702 BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
713 insn->a_reg = A_REG; 703
714 insn->x_reg = TMP_REG; 704 *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
715 705 BPF_REG_TMP, BPF_REG_CTX,
716 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) { 706 offsetof(struct sk_buff, dev));
717 insn->code = BPF_LDX | BPF_MEM | BPF_W; 707 /* if (tmp != 0) goto pc + 1 */
718 insn->off = offsetof(struct net_device, ifindex); 708 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
719 } else { 709 *insn++ = BPF_EXIT_INSN();
720 insn->code = BPF_LDX | BPF_MEM | BPF_H; 710 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
721 insn->off = offsetof(struct net_device, type); 711 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
722 } 712 offsetof(struct net_device, ifindex));
713 else
714 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
715 offsetof(struct net_device, type));
723 break; 716 break;
724 717
725 case SKF_AD_OFF + SKF_AD_MARK: 718 case SKF_AD_OFF + SKF_AD_MARK:
726 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); 719 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
727 720
728 insn->code = BPF_LDX | BPF_MEM | BPF_W; 721 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
729 insn->a_reg = A_REG; 722 offsetof(struct sk_buff, mark));
730 insn->x_reg = CTX_REG;
731 insn->off = offsetof(struct sk_buff, mark);
732 break; 723 break;
733 724
734 case SKF_AD_OFF + SKF_AD_RXHASH: 725 case SKF_AD_OFF + SKF_AD_RXHASH:
735 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); 726 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
736 727
737 insn->code = BPF_LDX | BPF_MEM | BPF_W; 728 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
738 insn->a_reg = A_REG; 729 offsetof(struct sk_buff, hash));
739 insn->x_reg = CTX_REG;
740 insn->off = offsetof(struct sk_buff, hash);
741 break; 730 break;
742 731
743 case SKF_AD_OFF + SKF_AD_QUEUE: 732 case SKF_AD_OFF + SKF_AD_QUEUE:
744 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); 733 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
745 734
746 insn->code = BPF_LDX | BPF_MEM | BPF_H; 735 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
747 insn->a_reg = A_REG; 736 offsetof(struct sk_buff, queue_mapping));
748 insn->x_reg = CTX_REG;
749 insn->off = offsetof(struct sk_buff, queue_mapping);
750 break; 737 break;
751 738
752 case SKF_AD_OFF + SKF_AD_VLAN_TAG: 739 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
753 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT: 740 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
754 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); 741 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
755
756 insn->code = BPF_LDX | BPF_MEM | BPF_H;
757 insn->a_reg = A_REG;
758 insn->x_reg = CTX_REG;
759 insn->off = offsetof(struct sk_buff, vlan_tci);
760 insn++;
761
762 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); 742 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
763 743
744 /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
745 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
746 offsetof(struct sk_buff, vlan_tci));
764 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { 747 if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
765 insn->code = BPF_ALU | BPF_AND | BPF_K; 748 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
766 insn->a_reg = A_REG; 749 ~VLAN_TAG_PRESENT);
767 insn->imm = ~VLAN_TAG_PRESENT;
768 } else { 750 } else {
769 insn->code = BPF_ALU | BPF_RSH | BPF_K; 751 /* A >>= 12 */
770 insn->a_reg = A_REG; 752 *insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
771 insn->imm = 12; 753 /* A &= 1 */
772 insn++; 754 *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
773
774 insn->code = BPF_ALU | BPF_AND | BPF_K;
775 insn->a_reg = A_REG;
776 insn->imm = 1;
777 } 755 }
778 break; 756 break;
779 757
@@ -781,46 +759,36 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
781 case SKF_AD_OFF + SKF_AD_NLATTR: 759 case SKF_AD_OFF + SKF_AD_NLATTR:
782 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 760 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
783 case SKF_AD_OFF + SKF_AD_CPU: 761 case SKF_AD_OFF + SKF_AD_CPU:
784 /* arg1 = ctx */ 762 case SKF_AD_OFF + SKF_AD_RANDOM:
785 insn->code = BPF_ALU64 | BPF_MOV | BPF_X; 763 /* arg1 = CTX */
786 insn->a_reg = ARG1_REG; 764 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
787 insn->x_reg = CTX_REG;
788 insn++;
789
790 /* arg2 = A */ 765 /* arg2 = A */
791 insn->code = BPF_ALU64 | BPF_MOV | BPF_X; 766 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
792 insn->a_reg = ARG2_REG;
793 insn->x_reg = A_REG;
794 insn++;
795
796 /* arg3 = X */ 767 /* arg3 = X */
797 insn->code = BPF_ALU64 | BPF_MOV | BPF_X; 768 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
798 insn->a_reg = ARG3_REG; 769 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
799 insn->x_reg = X_REG;
800 insn++;
801
802 /* Emit call(ctx, arg2=A, arg3=X) */
803 insn->code = BPF_JMP | BPF_CALL;
804 switch (fp->k) { 770 switch (fp->k) {
805 case SKF_AD_OFF + SKF_AD_PAY_OFFSET: 771 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
806 insn->imm = __skb_get_pay_offset - __bpf_call_base; 772 *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
807 break; 773 break;
808 case SKF_AD_OFF + SKF_AD_NLATTR: 774 case SKF_AD_OFF + SKF_AD_NLATTR:
809 insn->imm = __skb_get_nlattr - __bpf_call_base; 775 *insn = BPF_EMIT_CALL(__skb_get_nlattr);
810 break; 776 break;
811 case SKF_AD_OFF + SKF_AD_NLATTR_NEST: 777 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
812 insn->imm = __skb_get_nlattr_nest - __bpf_call_base; 778 *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
813 break; 779 break;
814 case SKF_AD_OFF + SKF_AD_CPU: 780 case SKF_AD_OFF + SKF_AD_CPU:
815 insn->imm = __get_raw_cpu_id - __bpf_call_base; 781 *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
782 break;
783 case SKF_AD_OFF + SKF_AD_RANDOM:
784 *insn = BPF_EMIT_CALL(__get_random_u32);
816 break; 785 break;
817 } 786 }
818 break; 787 break;
819 788
820 case SKF_AD_OFF + SKF_AD_ALU_XOR_X: 789 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
821 insn->code = BPF_ALU | BPF_XOR | BPF_X; 790 /* A ^= X */
822 insn->a_reg = A_REG; 791 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
823 insn->x_reg = X_REG;
824 break; 792 break;
825 793
826 default: 794 default:
@@ -870,7 +838,7 @@ int sk_convert_filter(struct sock_filter *prog, int len,
870 u8 bpf_src; 838 u8 bpf_src;
871 839
872 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK); 840 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
873 BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG); 841 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
874 842
875 if (len <= 0 || len >= BPF_MAXINSNS) 843 if (len <= 0 || len >= BPF_MAXINSNS)
876 return -EINVAL; 844 return -EINVAL;
@@ -885,11 +853,8 @@ do_pass:
885 new_insn = new_prog; 853 new_insn = new_prog;
886 fp = prog; 854 fp = prog;
887 855
888 if (new_insn) { 856 if (new_insn)
889 new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X; 857 *new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
890 new_insn->a_reg = CTX_REG;
891 new_insn->x_reg = ARG1_REG;
892 }
893 new_insn++; 858 new_insn++;
894 859
895 for (i = 0; i < len; fp++, i++) { 860 for (i = 0; i < len; fp++, i++) {
@@ -937,17 +902,16 @@ do_pass:
937 convert_bpf_extensions(fp, &insn)) 902 convert_bpf_extensions(fp, &insn))
938 break; 903 break;
939 904
940 insn->code = fp->code; 905 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
941 insn->a_reg = A_REG;
942 insn->x_reg = X_REG;
943 insn->imm = fp->k;
944 break; 906 break;
945 907
946 /* Jump opcodes map as-is, but offsets need adjustment. */ 908 /* Jump transformation cannot use BPF block macros
947 case BPF_JMP | BPF_JA: 909 * everywhere as offset calculation and target updates
948 target = i + fp->k + 1; 910 * require a bit more work than the rest, i.e. jump
949 insn->code = fp->code; 911 * opcodes map as-is, but offsets need adjustment.
950#define EMIT_JMP \ 912 */
913
914#define BPF_EMIT_JMP \
951 do { \ 915 do { \
952 if (target >= len || target < 0) \ 916 if (target >= len || target < 0) \
953 goto err; \ 917 goto err; \
@@ -956,7 +920,10 @@ do_pass:
956 insn->off -= insn - tmp_insns; \ 920 insn->off -= insn - tmp_insns; \
957 } while (0) 921 } while (0)
958 922
959 EMIT_JMP; 923 case BPF_JMP | BPF_JA:
924 target = i + fp->k + 1;
925 insn->code = fp->code;
926 BPF_EMIT_JMP;
960 break; 927 break;
961 928
962 case BPF_JMP | BPF_JEQ | BPF_K: 929 case BPF_JMP | BPF_JEQ | BPF_K:
@@ -972,17 +939,14 @@ do_pass:
972 * immediate into tmp register and use it 939 * immediate into tmp register and use it
973 * in compare insn. 940 * in compare insn.
974 */ 941 */
975 insn->code = BPF_ALU | BPF_MOV | BPF_K; 942 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
976 insn->a_reg = TMP_REG;
977 insn->imm = fp->k;
978 insn++;
979 943
980 insn->a_reg = A_REG; 944 insn->dst_reg = BPF_REG_A;
981 insn->x_reg = TMP_REG; 945 insn->src_reg = BPF_REG_TMP;
982 bpf_src = BPF_X; 946 bpf_src = BPF_X;
983 } else { 947 } else {
984 insn->a_reg = A_REG; 948 insn->dst_reg = BPF_REG_A;
985 insn->x_reg = X_REG; 949 insn->src_reg = BPF_REG_X;
986 insn->imm = fp->k; 950 insn->imm = fp->k;
987 bpf_src = BPF_SRC(fp->code); 951 bpf_src = BPF_SRC(fp->code);
988 } 952 }
@@ -991,7 +955,7 @@ do_pass:
991 if (fp->jf == 0) { 955 if (fp->jf == 0) {
992 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 956 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
993 target = i + fp->jt + 1; 957 target = i + fp->jt + 1;
994 EMIT_JMP; 958 BPF_EMIT_JMP;
995 break; 959 break;
996 } 960 }
997 961
@@ -999,127 +963,94 @@ do_pass:
999 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { 963 if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
1000 insn->code = BPF_JMP | BPF_JNE | bpf_src; 964 insn->code = BPF_JMP | BPF_JNE | bpf_src;
1001 target = i + fp->jf + 1; 965 target = i + fp->jf + 1;
1002 EMIT_JMP; 966 BPF_EMIT_JMP;
1003 break; 967 break;
1004 } 968 }
1005 969
1006 /* Other jumps are mapped into two insns: Jxx and JA. */ 970 /* Other jumps are mapped into two insns: Jxx and JA. */
1007 target = i + fp->jt + 1; 971 target = i + fp->jt + 1;
1008 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; 972 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
1009 EMIT_JMP; 973 BPF_EMIT_JMP;
1010 insn++; 974 insn++;
1011 975
1012 insn->code = BPF_JMP | BPF_JA; 976 insn->code = BPF_JMP | BPF_JA;
1013 target = i + fp->jf + 1; 977 target = i + fp->jf + 1;
1014 EMIT_JMP; 978 BPF_EMIT_JMP;
1015 break; 979 break;
1016 980
1017 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */ 981 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
1018 case BPF_LDX | BPF_MSH | BPF_B: 982 case BPF_LDX | BPF_MSH | BPF_B:
1019 insn->code = BPF_ALU64 | BPF_MOV | BPF_X; 983 /* tmp = A */
1020 insn->a_reg = TMP_REG; 984 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
1021 insn->x_reg = A_REG; 985 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
1022 insn++; 986 *insn++ = BPF_LD_ABS(BPF_B, fp->k);
1023 987 /* A &= 0xf */
1024 insn->code = BPF_LD | BPF_ABS | BPF_B; 988 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
1025 insn->a_reg = A_REG; 989 /* A <<= 2 */
1026 insn->imm = fp->k; 990 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
1027 insn++; 991 /* X = A */
1028 992 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
1029 insn->code = BPF_ALU | BPF_AND | BPF_K; 993 /* A = tmp */
1030 insn->a_reg = A_REG; 994 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
1031 insn->imm = 0xf;
1032 insn++;
1033
1034 insn->code = BPF_ALU | BPF_LSH | BPF_K;
1035 insn->a_reg = A_REG;
1036 insn->imm = 2;
1037 insn++;
1038
1039 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1040 insn->a_reg = X_REG;
1041 insn->x_reg = A_REG;
1042 insn++;
1043
1044 insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
1045 insn->a_reg = A_REG;
1046 insn->x_reg = TMP_REG;
1047 break; 995 break;
1048 996
1049 /* RET_K, RET_A are remaped into 2 insns. */ 997 /* RET_K, RET_A are remaped into 2 insns. */
1050 case BPF_RET | BPF_A: 998 case BPF_RET | BPF_A:
1051 case BPF_RET | BPF_K: 999 case BPF_RET | BPF_K:
1052 insn->code = BPF_ALU | BPF_MOV | 1000 *insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
1053 (BPF_RVAL(fp->code) == BPF_K ? 1001 BPF_K : BPF_X, BPF_REG_0,
1054 BPF_K : BPF_X); 1002 BPF_REG_A, fp->k);
1055 insn->a_reg = 0; 1003 *insn = BPF_EXIT_INSN();
1056 insn->x_reg = A_REG;
1057 insn->imm = fp->k;
1058 insn++;
1059
1060 insn->code = BPF_JMP | BPF_EXIT;
1061 break; 1004 break;
1062 1005
1063 /* Store to stack. */ 1006 /* Store to stack. */
1064 case BPF_ST: 1007 case BPF_ST:
1065 case BPF_STX: 1008 case BPF_STX:
1066 insn->code = BPF_STX | BPF_MEM | BPF_W; 1009 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
1067 insn->a_reg = FP_REG; 1010 BPF_ST ? BPF_REG_A : BPF_REG_X,
1068 insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG; 1011 -(BPF_MEMWORDS - fp->k) * 4);
1069 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1070 break; 1012 break;
1071 1013
1072 /* Load from stack. */ 1014 /* Load from stack. */
1073 case BPF_LD | BPF_MEM: 1015 case BPF_LD | BPF_MEM:
1074 case BPF_LDX | BPF_MEM: 1016 case BPF_LDX | BPF_MEM:
1075 insn->code = BPF_LDX | BPF_MEM | BPF_W; 1017 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
1076 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? 1018 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
1077 A_REG : X_REG; 1019 -(BPF_MEMWORDS - fp->k) * 4);
1078 insn->x_reg = FP_REG;
1079 insn->off = -(BPF_MEMWORDS - fp->k) * 4;
1080 break; 1020 break;
1081 1021
1082 /* A = K or X = K */ 1022 /* A = K or X = K */
1083 case BPF_LD | BPF_IMM: 1023 case BPF_LD | BPF_IMM:
1084 case BPF_LDX | BPF_IMM: 1024 case BPF_LDX | BPF_IMM:
1085 insn->code = BPF_ALU | BPF_MOV | BPF_K; 1025 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
1086 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? 1026 BPF_REG_A : BPF_REG_X, fp->k);
1087 A_REG : X_REG;
1088 insn->imm = fp->k;
1089 break; 1027 break;
1090 1028
1091 /* X = A */ 1029 /* X = A */
1092 case BPF_MISC | BPF_TAX: 1030 case BPF_MISC | BPF_TAX:
1093 insn->code = BPF_ALU64 | BPF_MOV | BPF_X; 1031 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
1094 insn->a_reg = X_REG;
1095 insn->x_reg = A_REG;
1096 break; 1032 break;
1097 1033
1098 /* A = X */ 1034 /* A = X */
1099 case BPF_MISC | BPF_TXA: 1035 case BPF_MISC | BPF_TXA:
1100 insn->code = BPF_ALU64 | BPF_MOV | BPF_X; 1036 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
1101 insn->a_reg = A_REG;
1102 insn->x_reg = X_REG;
1103 break; 1037 break;
1104 1038
1105 /* A = skb->len or X = skb->len */ 1039 /* A = skb->len or X = skb->len */
1106 case BPF_LD | BPF_W | BPF_LEN: 1040 case BPF_LD | BPF_W | BPF_LEN:
1107 case BPF_LDX | BPF_W | BPF_LEN: 1041 case BPF_LDX | BPF_W | BPF_LEN:
1108 insn->code = BPF_LDX | BPF_MEM | BPF_W; 1042 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
1109 insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ? 1043 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
1110 A_REG : X_REG; 1044 offsetof(struct sk_buff, len));
1111 insn->x_reg = CTX_REG;
1112 insn->off = offsetof(struct sk_buff, len);
1113 break; 1045 break;
1114 1046
1115 /* access seccomp_data fields */ 1047 /* Access seccomp_data fields. */
1116 case BPF_LDX | BPF_ABS | BPF_W: 1048 case BPF_LDX | BPF_ABS | BPF_W:
1117 insn->code = BPF_LDX | BPF_MEM | BPF_W; 1049 /* A = *(u32 *) (ctx + K) */
1118 insn->a_reg = A_REG; 1050 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
1119 insn->x_reg = CTX_REG;
1120 insn->off = fp->k;
1121 break; 1051 break;
1122 1052
1053 /* Unkown instruction. */
1123 default: 1054 default:
1124 goto err; 1055 goto err;
1125 } 1056 }
@@ -1128,7 +1059,6 @@ do_pass:
1128 if (new_prog) 1059 if (new_prog)
1129 memcpy(new_insn, tmp_insns, 1060 memcpy(new_insn, tmp_insns,
1130 sizeof(*insn) * (insn - tmp_insns)); 1061 sizeof(*insn) * (insn - tmp_insns));
1131
1132 new_insn += insn - tmp_insns; 1062 new_insn += insn - tmp_insns;
1133 } 1063 }
1134 1064
@@ -1143,7 +1073,6 @@ do_pass:
1143 new_flen = new_insn - new_prog; 1073 new_flen = new_insn - new_prog;
1144 if (pass > 2) 1074 if (pass > 2)
1145 goto err; 1075 goto err;
1146
1147 goto do_pass; 1076 goto do_pass;
1148 } 1077 }
1149 1078
@@ -1167,44 +1096,46 @@ err:
1167 */ 1096 */
1168static int check_load_and_stores(struct sock_filter *filter, int flen) 1097static int check_load_and_stores(struct sock_filter *filter, int flen)
1169{ 1098{
1170 u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */ 1099 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
1171 int pc, ret = 0; 1100 int pc, ret = 0;
1172 1101
1173 BUILD_BUG_ON(BPF_MEMWORDS > 16); 1102 BUILD_BUG_ON(BPF_MEMWORDS > 16);
1103
1174 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL); 1104 masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
1175 if (!masks) 1105 if (!masks)
1176 return -ENOMEM; 1106 return -ENOMEM;
1107
1177 memset(masks, 0xff, flen * sizeof(*masks)); 1108 memset(masks, 0xff, flen * sizeof(*masks));
1178 1109
1179 for (pc = 0; pc < flen; pc++) { 1110 for (pc = 0; pc < flen; pc++) {
1180 memvalid &= masks[pc]; 1111 memvalid &= masks[pc];
1181 1112
1182 switch (filter[pc].code) { 1113 switch (filter[pc].code) {
1183 case BPF_S_ST: 1114 case BPF_ST:
1184 case BPF_S_STX: 1115 case BPF_STX:
1185 memvalid |= (1 << filter[pc].k); 1116 memvalid |= (1 << filter[pc].k);
1186 break; 1117 break;
1187 case BPF_S_LD_MEM: 1118 case BPF_LD | BPF_MEM:
1188 case BPF_S_LDX_MEM: 1119 case BPF_LDX | BPF_MEM:
1189 if (!(memvalid & (1 << filter[pc].k))) { 1120 if (!(memvalid & (1 << filter[pc].k))) {
1190 ret = -EINVAL; 1121 ret = -EINVAL;
1191 goto error; 1122 goto error;
1192 } 1123 }
1193 break; 1124 break;
1194 case BPF_S_JMP_JA: 1125 case BPF_JMP | BPF_JA:
1195 /* a jump must set masks on target */ 1126 /* A jump must set masks on target */
1196 masks[pc + 1 + filter[pc].k] &= memvalid; 1127 masks[pc + 1 + filter[pc].k] &= memvalid;
1197 memvalid = ~0; 1128 memvalid = ~0;
1198 break; 1129 break;
1199 case BPF_S_JMP_JEQ_K: 1130 case BPF_JMP | BPF_JEQ | BPF_K:
1200 case BPF_S_JMP_JEQ_X: 1131 case BPF_JMP | BPF_JEQ | BPF_X:
1201 case BPF_S_JMP_JGE_K: 1132 case BPF_JMP | BPF_JGE | BPF_K:
1202 case BPF_S_JMP_JGE_X: 1133 case BPF_JMP | BPF_JGE | BPF_X:
1203 case BPF_S_JMP_JGT_K: 1134 case BPF_JMP | BPF_JGT | BPF_K:
1204 case BPF_S_JMP_JGT_X: 1135 case BPF_JMP | BPF_JGT | BPF_X:
1205 case BPF_S_JMP_JSET_X: 1136 case BPF_JMP | BPF_JSET | BPF_K:
1206 case BPF_S_JMP_JSET_K: 1137 case BPF_JMP | BPF_JSET | BPF_X:
1207 /* a jump must set masks on targets */ 1138 /* A jump must set masks on targets */
1208 masks[pc + 1 + filter[pc].jt] &= memvalid; 1139 masks[pc + 1 + filter[pc].jt] &= memvalid;
1209 masks[pc + 1 + filter[pc].jf] &= memvalid; 1140 masks[pc + 1 + filter[pc].jf] &= memvalid;
1210 memvalid = ~0; 1141 memvalid = ~0;
@@ -1216,6 +1147,72 @@ error:
1216 return ret; 1147 return ret;
1217} 1148}
1218 1149
1150static bool chk_code_allowed(u16 code_to_probe)
1151{
1152 static const bool codes[] = {
1153 /* 32 bit ALU operations */
1154 [BPF_ALU | BPF_ADD | BPF_K] = true,
1155 [BPF_ALU | BPF_ADD | BPF_X] = true,
1156 [BPF_ALU | BPF_SUB | BPF_K] = true,
1157 [BPF_ALU | BPF_SUB | BPF_X] = true,
1158 [BPF_ALU | BPF_MUL | BPF_K] = true,
1159 [BPF_ALU | BPF_MUL | BPF_X] = true,
1160 [BPF_ALU | BPF_DIV | BPF_K] = true,
1161 [BPF_ALU | BPF_DIV | BPF_X] = true,
1162 [BPF_ALU | BPF_MOD | BPF_K] = true,
1163 [BPF_ALU | BPF_MOD | BPF_X] = true,
1164 [BPF_ALU | BPF_AND | BPF_K] = true,
1165 [BPF_ALU | BPF_AND | BPF_X] = true,
1166 [BPF_ALU | BPF_OR | BPF_K] = true,
1167 [BPF_ALU | BPF_OR | BPF_X] = true,
1168 [BPF_ALU | BPF_XOR | BPF_K] = true,
1169 [BPF_ALU | BPF_XOR | BPF_X] = true,
1170 [BPF_ALU | BPF_LSH | BPF_K] = true,
1171 [BPF_ALU | BPF_LSH | BPF_X] = true,
1172 [BPF_ALU | BPF_RSH | BPF_K] = true,
1173 [BPF_ALU | BPF_RSH | BPF_X] = true,
1174 [BPF_ALU | BPF_NEG] = true,
1175 /* Load instructions */
1176 [BPF_LD | BPF_W | BPF_ABS] = true,
1177 [BPF_LD | BPF_H | BPF_ABS] = true,
1178 [BPF_LD | BPF_B | BPF_ABS] = true,
1179 [BPF_LD | BPF_W | BPF_LEN] = true,
1180 [BPF_LD | BPF_W | BPF_IND] = true,
1181 [BPF_LD | BPF_H | BPF_IND] = true,
1182 [BPF_LD | BPF_B | BPF_IND] = true,
1183 [BPF_LD | BPF_IMM] = true,
1184 [BPF_LD | BPF_MEM] = true,
1185 [BPF_LDX | BPF_W | BPF_LEN] = true,
1186 [BPF_LDX | BPF_B | BPF_MSH] = true,
1187 [BPF_LDX | BPF_IMM] = true,
1188 [BPF_LDX | BPF_MEM] = true,
1189 /* Store instructions */
1190 [BPF_ST] = true,
1191 [BPF_STX] = true,
1192 /* Misc instructions */
1193 [BPF_MISC | BPF_TAX] = true,
1194 [BPF_MISC | BPF_TXA] = true,
1195 /* Return instructions */
1196 [BPF_RET | BPF_K] = true,
1197 [BPF_RET | BPF_A] = true,
1198 /* Jump instructions */
1199 [BPF_JMP | BPF_JA] = true,
1200 [BPF_JMP | BPF_JEQ | BPF_K] = true,
1201 [BPF_JMP | BPF_JEQ | BPF_X] = true,
1202 [BPF_JMP | BPF_JGE | BPF_K] = true,
1203 [BPF_JMP | BPF_JGE | BPF_X] = true,
1204 [BPF_JMP | BPF_JGT | BPF_K] = true,
1205 [BPF_JMP | BPF_JGT | BPF_X] = true,
1206 [BPF_JMP | BPF_JSET | BPF_K] = true,
1207 [BPF_JMP | BPF_JSET | BPF_X] = true,
1208 };
1209
1210 if (code_to_probe >= ARRAY_SIZE(codes))
1211 return false;
1212
1213 return codes[code_to_probe];
1214}
1215
1219/** 1216/**
1220 * sk_chk_filter - verify socket filter code 1217 * sk_chk_filter - verify socket filter code
1221 * @filter: filter to verify 1218 * @filter: filter to verify
@@ -1232,153 +1229,76 @@ error:
1232 */ 1229 */
1233int sk_chk_filter(struct sock_filter *filter, unsigned int flen) 1230int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
1234{ 1231{
1235 /*
1236 * Valid instructions are initialized to non-0.
1237 * Invalid instructions are initialized to 0.
1238 */
1239 static const u8 codes[] = {
1240 [BPF_ALU|BPF_ADD|BPF_K] = BPF_S_ALU_ADD_K,
1241 [BPF_ALU|BPF_ADD|BPF_X] = BPF_S_ALU_ADD_X,
1242 [BPF_ALU|BPF_SUB|BPF_K] = BPF_S_ALU_SUB_K,
1243 [BPF_ALU|BPF_SUB|BPF_X] = BPF_S_ALU_SUB_X,
1244 [BPF_ALU|BPF_MUL|BPF_K] = BPF_S_ALU_MUL_K,
1245 [BPF_ALU|BPF_MUL|BPF_X] = BPF_S_ALU_MUL_X,
1246 [BPF_ALU|BPF_DIV|BPF_X] = BPF_S_ALU_DIV_X,
1247 [BPF_ALU|BPF_MOD|BPF_K] = BPF_S_ALU_MOD_K,
1248 [BPF_ALU|BPF_MOD|BPF_X] = BPF_S_ALU_MOD_X,
1249 [BPF_ALU|BPF_AND|BPF_K] = BPF_S_ALU_AND_K,
1250 [BPF_ALU|BPF_AND|BPF_X] = BPF_S_ALU_AND_X,
1251 [BPF_ALU|BPF_OR|BPF_K] = BPF_S_ALU_OR_K,
1252 [BPF_ALU|BPF_OR|BPF_X] = BPF_S_ALU_OR_X,
1253 [BPF_ALU|BPF_XOR|BPF_K] = BPF_S_ALU_XOR_K,
1254 [BPF_ALU|BPF_XOR|BPF_X] = BPF_S_ALU_XOR_X,
1255 [BPF_ALU|BPF_LSH|BPF_K] = BPF_S_ALU_LSH_K,
1256 [BPF_ALU|BPF_LSH|BPF_X] = BPF_S_ALU_LSH_X,
1257 [BPF_ALU|BPF_RSH|BPF_K] = BPF_S_ALU_RSH_K,
1258 [BPF_ALU|BPF_RSH|BPF_X] = BPF_S_ALU_RSH_X,
1259 [BPF_ALU|BPF_NEG] = BPF_S_ALU_NEG,
1260 [BPF_LD|BPF_W|BPF_ABS] = BPF_S_LD_W_ABS,
1261 [BPF_LD|BPF_H|BPF_ABS] = BPF_S_LD_H_ABS,
1262 [BPF_LD|BPF_B|BPF_ABS] = BPF_S_LD_B_ABS,
1263 [BPF_LD|BPF_W|BPF_LEN] = BPF_S_LD_W_LEN,
1264 [BPF_LD|BPF_W|BPF_IND] = BPF_S_LD_W_IND,
1265 [BPF_LD|BPF_H|BPF_IND] = BPF_S_LD_H_IND,
1266 [BPF_LD|BPF_B|BPF_IND] = BPF_S_LD_B_IND,
1267 [BPF_LD|BPF_IMM] = BPF_S_LD_IMM,
1268 [BPF_LDX|BPF_W|BPF_LEN] = BPF_S_LDX_W_LEN,
1269 [BPF_LDX|BPF_B|BPF_MSH] = BPF_S_LDX_B_MSH,
1270 [BPF_LDX|BPF_IMM] = BPF_S_LDX_IMM,
1271 [BPF_MISC|BPF_TAX] = BPF_S_MISC_TAX,
1272 [BPF_MISC|BPF_TXA] = BPF_S_MISC_TXA,
1273 [BPF_RET|BPF_K] = BPF_S_RET_K,
1274 [BPF_RET|BPF_A] = BPF_S_RET_A,
1275 [BPF_ALU|BPF_DIV|BPF_K] = BPF_S_ALU_DIV_K,
1276 [BPF_LD|BPF_MEM] = BPF_S_LD_MEM,
1277 [BPF_LDX|BPF_MEM] = BPF_S_LDX_MEM,
1278 [BPF_ST] = BPF_S_ST,
1279 [BPF_STX] = BPF_S_STX,
1280 [BPF_JMP|BPF_JA] = BPF_S_JMP_JA,
1281 [BPF_JMP|BPF_JEQ|BPF_K] = BPF_S_JMP_JEQ_K,
1282 [BPF_JMP|BPF_JEQ|BPF_X] = BPF_S_JMP_JEQ_X,
1283 [BPF_JMP|BPF_JGE|BPF_K] = BPF_S_JMP_JGE_K,
1284 [BPF_JMP|BPF_JGE|BPF_X] = BPF_S_JMP_JGE_X,
1285 [BPF_JMP|BPF_JGT|BPF_K] = BPF_S_JMP_JGT_K,
1286 [BPF_JMP|BPF_JGT|BPF_X] = BPF_S_JMP_JGT_X,
1287 [BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
1288 [BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
1289 };
1290 int pc;
1291 bool anc_found; 1232 bool anc_found;
1233 int pc;
1292 1234
1293 if (flen == 0 || flen > BPF_MAXINSNS) 1235 if (flen == 0 || flen > BPF_MAXINSNS)
1294 return -EINVAL; 1236 return -EINVAL;
1295 1237
1296 /* check the filter code now */ 1238 /* Check the filter code now */
1297 for (pc = 0; pc < flen; pc++) { 1239 for (pc = 0; pc < flen; pc++) {
1298 struct sock_filter *ftest = &filter[pc]; 1240 struct sock_filter *ftest = &filter[pc];
1299 u16 code = ftest->code;
1300 1241
1301 if (code >= ARRAY_SIZE(codes)) 1242 /* May we actually operate on this code? */
1302 return -EINVAL; 1243 if (!chk_code_allowed(ftest->code))
1303 code = codes[code];
1304 if (!code)
1305 return -EINVAL; 1244 return -EINVAL;
1245
1306 /* Some instructions need special checks */ 1246 /* Some instructions need special checks */
1307 switch (code) { 1247 switch (ftest->code) {
1308 case BPF_S_ALU_DIV_K: 1248 case BPF_ALU | BPF_DIV | BPF_K:
1309 case BPF_S_ALU_MOD_K: 1249 case BPF_ALU | BPF_MOD | BPF_K:
1310 /* check for division by zero */ 1250 /* Check for division by zero */
1311 if (ftest->k == 0) 1251 if (ftest->k == 0)
1312 return -EINVAL; 1252 return -EINVAL;
1313 break; 1253 break;
1314 case BPF_S_LD_MEM: 1254 case BPF_LD | BPF_MEM:
1315 case BPF_S_LDX_MEM: 1255 case BPF_LDX | BPF_MEM:
1316 case BPF_S_ST: 1256 case BPF_ST:
1317 case BPF_S_STX: 1257 case BPF_STX:
1318 /* check for invalid memory addresses */ 1258 /* Check for invalid memory addresses */
1319 if (ftest->k >= BPF_MEMWORDS) 1259 if (ftest->k >= BPF_MEMWORDS)
1320 return -EINVAL; 1260 return -EINVAL;
1321 break; 1261 break;
1322 case BPF_S_JMP_JA: 1262 case BPF_JMP | BPF_JA:
1323 /* 1263 /* Note, the large ftest->k might cause loops.
1324 * Note, the large ftest->k might cause loops.
1325 * Compare this with conditional jumps below, 1264 * Compare this with conditional jumps below,
1326 * where offsets are limited. --ANK (981016) 1265 * where offsets are limited. --ANK (981016)
1327 */ 1266 */
1328 if (ftest->k >= (unsigned int)(flen-pc-1)) 1267 if (ftest->k >= (unsigned int)(flen - pc - 1))
1329 return -EINVAL; 1268 return -EINVAL;
1330 break; 1269 break;
1331 case BPF_S_JMP_JEQ_K: 1270 case BPF_JMP | BPF_JEQ | BPF_K:
1332 case BPF_S_JMP_JEQ_X: 1271 case BPF_JMP | BPF_JEQ | BPF_X:
1333 case BPF_S_JMP_JGE_K: 1272 case BPF_JMP | BPF_JGE | BPF_K:
1334 case BPF_S_JMP_JGE_X: 1273 case BPF_JMP | BPF_JGE | BPF_X:
1335 case BPF_S_JMP_JGT_K: 1274 case BPF_JMP | BPF_JGT | BPF_K:
1336 case BPF_S_JMP_JGT_X: 1275 case BPF_JMP | BPF_JGT | BPF_X:
1337 case BPF_S_JMP_JSET_X: 1276 case BPF_JMP | BPF_JSET | BPF_K:
1338 case BPF_S_JMP_JSET_K: 1277 case BPF_JMP | BPF_JSET | BPF_X:
1339 /* for conditionals both must be safe */ 1278 /* Both conditionals must be safe */
1340 if (pc + ftest->jt + 1 >= flen || 1279 if (pc + ftest->jt + 1 >= flen ||
1341 pc + ftest->jf + 1 >= flen) 1280 pc + ftest->jf + 1 >= flen)
1342 return -EINVAL; 1281 return -EINVAL;
1343 break; 1282 break;
1344 case BPF_S_LD_W_ABS: 1283 case BPF_LD | BPF_W | BPF_ABS:
1345 case BPF_S_LD_H_ABS: 1284 case BPF_LD | BPF_H | BPF_ABS:
1346 case BPF_S_LD_B_ABS: 1285 case BPF_LD | BPF_B | BPF_ABS:
1347 anc_found = false; 1286 anc_found = false;
1348#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE: \ 1287 if (bpf_anc_helper(ftest) & BPF_ANC)
1349 code = BPF_S_ANC_##CODE; \ 1288 anc_found = true;
1350 anc_found = true; \ 1289 /* Ancillary operation unknown or unsupported */
1351 break
1352 switch (ftest->k) {
1353 ANCILLARY(PROTOCOL);
1354 ANCILLARY(PKTTYPE);
1355 ANCILLARY(IFINDEX);
1356 ANCILLARY(NLATTR);
1357 ANCILLARY(NLATTR_NEST);
1358 ANCILLARY(MARK);
1359 ANCILLARY(QUEUE);
1360 ANCILLARY(HATYPE);
1361 ANCILLARY(RXHASH);
1362 ANCILLARY(CPU);
1363 ANCILLARY(ALU_XOR_X);
1364 ANCILLARY(VLAN_TAG);
1365 ANCILLARY(VLAN_TAG_PRESENT);
1366 ANCILLARY(PAY_OFFSET);
1367 }
1368
1369 /* ancillary operation unknown or unsupported */
1370 if (anc_found == false && ftest->k >= SKF_AD_OFF) 1290 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1371 return -EINVAL; 1291 return -EINVAL;
1372 } 1292 }
1373 ftest->code = code;
1374 } 1293 }
1375 1294
1376 /* last instruction must be a RET code */ 1295 /* Last instruction must be a RET code */
1377 switch (filter[flen - 1].code) { 1296 switch (filter[flen - 1].code) {
1378 case BPF_S_RET_K: 1297 case BPF_RET | BPF_K:
1379 case BPF_S_RET_A: 1298 case BPF_RET | BPF_A:
1380 return check_load_and_stores(filter, flen); 1299 return check_load_and_stores(filter, flen);
1381 } 1300 }
1301
1382 return -EINVAL; 1302 return -EINVAL;
1383} 1303}
1384EXPORT_SYMBOL(sk_chk_filter); 1304EXPORT_SYMBOL(sk_chk_filter);
@@ -1423,7 +1343,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
1423 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); 1343 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1424 1344
1425 sk_release_orig_filter(fp); 1345 sk_release_orig_filter(fp);
1426 bpf_jit_free(fp); 1346 sk_filter_free(fp);
1427} 1347}
1428 1348
1429/** 1349/**
@@ -1461,7 +1381,7 @@ static struct sk_filter *__sk_migrate_realloc(struct sk_filter *fp,
1461 1381
1462 fp_new = sock_kmalloc(sk, len, GFP_KERNEL); 1382 fp_new = sock_kmalloc(sk, len, GFP_KERNEL);
1463 if (fp_new) { 1383 if (fp_new) {
1464 memcpy(fp_new, fp, sizeof(struct sk_filter)); 1384 *fp_new = *fp;
1465 /* As we're kepping orig_prog in fp_new along, 1385 /* As we're kepping orig_prog in fp_new along,
1466 * we need to make sure we're not evicting it 1386 * we need to make sure we're not evicting it
1467 * from the old fp. 1387 * from the old fp.
@@ -1478,7 +1398,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1478{ 1398{
1479 struct sock_filter *old_prog; 1399 struct sock_filter *old_prog;
1480 struct sk_filter *old_fp; 1400 struct sk_filter *old_fp;
1481 int i, err, new_len, old_len = fp->len; 1401 int err, new_len, old_len = fp->len;
1482 1402
1483 /* We are free to overwrite insns et al right here as it 1403 /* We are free to overwrite insns et al right here as it
1484 * won't be used at this point in time anymore internally 1404 * won't be used at this point in time anymore internally
@@ -1488,13 +1408,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1488 BUILD_BUG_ON(sizeof(struct sock_filter) != 1408 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1489 sizeof(struct sock_filter_int)); 1409 sizeof(struct sock_filter_int));
1490 1410
1491 /* For now, we need to unfiddle BPF_S_* identifiers in place.
1492 * This can sooner or later on be subject to removal, e.g. when
1493 * JITs have been converted.
1494 */
1495 for (i = 0; i < fp->len; i++)
1496 sk_decode_filter(&fp->insns[i], &fp->insns[i]);
1497
1498 /* Conversion cannot happen on overlapping memory areas, 1411 /* Conversion cannot happen on overlapping memory areas,
1499 * so we need to keep the user BPF around until the 2nd 1412 * so we need to keep the user BPF around until the 2nd
1500 * pass. At this time, the user BPF is stored in fp->insns. 1413 * pass. At this time, the user BPF is stored in fp->insns.
@@ -1523,7 +1436,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1523 goto out_err_free; 1436 goto out_err_free;
1524 } 1437 }
1525 1438
1526 fp->bpf_func = sk_run_filter_int_skb;
1527 fp->len = new_len; 1439 fp->len = new_len;
1528 1440
1529 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */ 1441 /* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
@@ -1536,6 +1448,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
1536 */ 1448 */
1537 goto out_err_free; 1449 goto out_err_free;
1538 1450
1451 sk_filter_select_runtime(fp);
1452
1539 kfree(old_prog); 1453 kfree(old_prog);
1540 return fp; 1454 return fp;
1541 1455
@@ -1550,6 +1464,33 @@ out_err:
1550 return ERR_PTR(err); 1464 return ERR_PTR(err);
1551} 1465}
1552 1466
1467void __weak bpf_int_jit_compile(struct sk_filter *prog)
1468{
1469}
1470
1471/**
1472 * sk_filter_select_runtime - select execution runtime for BPF program
1473 * @fp: sk_filter populated with internal BPF program
1474 *
1475 * try to JIT internal BPF program, if JIT is not available select interpreter
1476 * BPF program will be executed via SK_RUN_FILTER() macro
1477 */
1478void sk_filter_select_runtime(struct sk_filter *fp)
1479{
1480 fp->bpf_func = (void *) __sk_run_filter;
1481
1482 /* Probe if internal BPF can be JITed */
1483 bpf_int_jit_compile(fp);
1484}
1485EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
1486
1487/* free internal BPF program */
1488void sk_filter_free(struct sk_filter *fp)
1489{
1490 bpf_jit_free(fp);
1491}
1492EXPORT_SYMBOL_GPL(sk_filter_free);
1493
1553static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp, 1494static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1554 struct sock *sk) 1495 struct sock *sk)
1555{ 1496{
@@ -1592,7 +1533,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
1592 * a negative errno code is returned. On success the return is zero. 1533 * a negative errno code is returned. On success the return is zero.
1593 */ 1534 */
1594int sk_unattached_filter_create(struct sk_filter **pfp, 1535int sk_unattached_filter_create(struct sk_filter **pfp,
1595 struct sock_fprog *fprog) 1536 struct sock_fprog_kern *fprog)
1596{ 1537{
1597 unsigned int fsize = sk_filter_proglen(fprog); 1538 unsigned int fsize = sk_filter_proglen(fprog);
1598 struct sk_filter *fp; 1539 struct sk_filter *fp;
@@ -1713,83 +1654,6 @@ int sk_detach_filter(struct sock *sk)
1713} 1654}
1714EXPORT_SYMBOL_GPL(sk_detach_filter); 1655EXPORT_SYMBOL_GPL(sk_detach_filter);
1715 1656
1716void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
1717{
1718 static const u16 decodes[] = {
1719 [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K,
1720 [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X,
1721 [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K,
1722 [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X,
1723 [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K,
1724 [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X,
1725 [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X,
1726 [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K,
1727 [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X,
1728 [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K,
1729 [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X,
1730 [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K,
1731 [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X,
1732 [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K,
1733 [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X,
1734 [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K,
1735 [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X,
1736 [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K,
1737 [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X,
1738 [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG,
1739 [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS,
1740 [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS,
1741 [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS,
1742 [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS,
1743 [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS,
1744 [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS,
1745 [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS,
1746 [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS,
1747 [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS,
1748 [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS,
1749 [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS,
1750 [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS,
1751 [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS,
1752 [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS,
1753 [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS,
1754 [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
1755 [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS,
1756 [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN,
1757 [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND,
1758 [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND,
1759 [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND,
1760 [BPF_S_LD_IMM] = BPF_LD|BPF_IMM,
1761 [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN,
1762 [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH,
1763 [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM,
1764 [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX,
1765 [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA,
1766 [BPF_S_RET_K] = BPF_RET|BPF_K,
1767 [BPF_S_RET_A] = BPF_RET|BPF_A,
1768 [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K,
1769 [BPF_S_LD_MEM] = BPF_LD|BPF_MEM,
1770 [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM,
1771 [BPF_S_ST] = BPF_ST,
1772 [BPF_S_STX] = BPF_STX,
1773 [BPF_S_JMP_JA] = BPF_JMP|BPF_JA,
1774 [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K,
1775 [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X,
1776 [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K,
1777 [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X,
1778 [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K,
1779 [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X,
1780 [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K,
1781 [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X,
1782 };
1783 u16 code;
1784
1785 code = filt->code;
1786
1787 to->code = decodes[code];
1788 to->jt = filt->jt;
1789 to->jf = filt->jf;
1790 to->k = filt->k;
1791}
1792
1793int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, 1657int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
1794 unsigned int len) 1658 unsigned int len)
1795{ 1659{
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7c8ffd974961..85b62691f4f2 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -273,7 +273,7 @@ static void cleanup_net(struct work_struct *work)
273{ 273{
274 const struct pernet_operations *ops; 274 const struct pernet_operations *ops;
275 struct net *net, *tmp; 275 struct net *net, *tmp;
276 LIST_HEAD(net_kill_list); 276 struct list_head net_kill_list;
277 LIST_HEAD(net_exit_list); 277 LIST_HEAD(net_exit_list);
278 278
279 /* Atomically snapshot the list of namespaces to cleanup */ 279 /* Atomically snapshot the list of namespaces to cleanup */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0304f981f7ff..fc17a9d309ac 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -573,7 +573,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
573 is_zero_ether_addr(pkt_dev->src_mac) ? 573 is_zero_ether_addr(pkt_dev->src_mac) ?
574 pkt_dev->odev->dev_addr : pkt_dev->src_mac); 574 pkt_dev->odev->dev_addr : pkt_dev->src_mac);
575 575
576 seq_printf(seq, "dst_mac: "); 576 seq_puts(seq, "dst_mac: ");
577 seq_printf(seq, "%pM\n", pkt_dev->dst_mac); 577 seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
578 578
579 seq_printf(seq, 579 seq_printf(seq,
@@ -588,7 +588,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
588 588
589 if (pkt_dev->nr_labels) { 589 if (pkt_dev->nr_labels) {
590 unsigned int i; 590 unsigned int i;
591 seq_printf(seq, " mpls: "); 591 seq_puts(seq, " mpls: ");
592 for (i = 0; i < pkt_dev->nr_labels; i++) 592 for (i = 0; i < pkt_dev->nr_labels; i++)
593 seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), 593 seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
594 i == pkt_dev->nr_labels-1 ? "\n" : ", "); 594 i == pkt_dev->nr_labels-1 ? "\n" : ", ");
@@ -613,67 +613,67 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
613 if (pkt_dev->node >= 0) 613 if (pkt_dev->node >= 0)
614 seq_printf(seq, " node: %d\n", pkt_dev->node); 614 seq_printf(seq, " node: %d\n", pkt_dev->node);
615 615
616 seq_printf(seq, " Flags: "); 616 seq_puts(seq, " Flags: ");
617 617
618 if (pkt_dev->flags & F_IPV6) 618 if (pkt_dev->flags & F_IPV6)
619 seq_printf(seq, "IPV6 "); 619 seq_puts(seq, "IPV6 ");
620 620
621 if (pkt_dev->flags & F_IPSRC_RND) 621 if (pkt_dev->flags & F_IPSRC_RND)
622 seq_printf(seq, "IPSRC_RND "); 622 seq_puts(seq, "IPSRC_RND ");
623 623
624 if (pkt_dev->flags & F_IPDST_RND) 624 if (pkt_dev->flags & F_IPDST_RND)
625 seq_printf(seq, "IPDST_RND "); 625 seq_puts(seq, "IPDST_RND ");
626 626
627 if (pkt_dev->flags & F_TXSIZE_RND) 627 if (pkt_dev->flags & F_TXSIZE_RND)
628 seq_printf(seq, "TXSIZE_RND "); 628 seq_puts(seq, "TXSIZE_RND ");
629 629
630 if (pkt_dev->flags & F_UDPSRC_RND) 630 if (pkt_dev->flags & F_UDPSRC_RND)
631 seq_printf(seq, "UDPSRC_RND "); 631 seq_puts(seq, "UDPSRC_RND ");
632 632
633 if (pkt_dev->flags & F_UDPDST_RND) 633 if (pkt_dev->flags & F_UDPDST_RND)
634 seq_printf(seq, "UDPDST_RND "); 634 seq_puts(seq, "UDPDST_RND ");
635 635
636 if (pkt_dev->flags & F_UDPCSUM) 636 if (pkt_dev->flags & F_UDPCSUM)
637 seq_printf(seq, "UDPCSUM "); 637 seq_puts(seq, "UDPCSUM ");
638 638
639 if (pkt_dev->flags & F_MPLS_RND) 639 if (pkt_dev->flags & F_MPLS_RND)
640 seq_printf(seq, "MPLS_RND "); 640 seq_puts(seq, "MPLS_RND ");
641 641
642 if (pkt_dev->flags & F_QUEUE_MAP_RND) 642 if (pkt_dev->flags & F_QUEUE_MAP_RND)
643 seq_printf(seq, "QUEUE_MAP_RND "); 643 seq_puts(seq, "QUEUE_MAP_RND ");
644 644
645 if (pkt_dev->flags & F_QUEUE_MAP_CPU) 645 if (pkt_dev->flags & F_QUEUE_MAP_CPU)
646 seq_printf(seq, "QUEUE_MAP_CPU "); 646 seq_puts(seq, "QUEUE_MAP_CPU ");
647 647
648 if (pkt_dev->cflows) { 648 if (pkt_dev->cflows) {
649 if (pkt_dev->flags & F_FLOW_SEQ) 649 if (pkt_dev->flags & F_FLOW_SEQ)
650 seq_printf(seq, "FLOW_SEQ "); /*in sequence flows*/ 650 seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
651 else 651 else
652 seq_printf(seq, "FLOW_RND "); 652 seq_puts(seq, "FLOW_RND ");
653 } 653 }
654 654
655#ifdef CONFIG_XFRM 655#ifdef CONFIG_XFRM
656 if (pkt_dev->flags & F_IPSEC_ON) { 656 if (pkt_dev->flags & F_IPSEC_ON) {
657 seq_printf(seq, "IPSEC "); 657 seq_puts(seq, "IPSEC ");
658 if (pkt_dev->spi) 658 if (pkt_dev->spi)
659 seq_printf(seq, "spi:%u", pkt_dev->spi); 659 seq_printf(seq, "spi:%u", pkt_dev->spi);
660 } 660 }
661#endif 661#endif
662 662
663 if (pkt_dev->flags & F_MACSRC_RND) 663 if (pkt_dev->flags & F_MACSRC_RND)
664 seq_printf(seq, "MACSRC_RND "); 664 seq_puts(seq, "MACSRC_RND ");
665 665
666 if (pkt_dev->flags & F_MACDST_RND) 666 if (pkt_dev->flags & F_MACDST_RND)
667 seq_printf(seq, "MACDST_RND "); 667 seq_puts(seq, "MACDST_RND ");
668 668
669 if (pkt_dev->flags & F_VID_RND) 669 if (pkt_dev->flags & F_VID_RND)
670 seq_printf(seq, "VID_RND "); 670 seq_puts(seq, "VID_RND ");
671 671
672 if (pkt_dev->flags & F_SVID_RND) 672 if (pkt_dev->flags & F_SVID_RND)
673 seq_printf(seq, "SVID_RND "); 673 seq_puts(seq, "SVID_RND ");
674 674
675 if (pkt_dev->flags & F_NODE) 675 if (pkt_dev->flags & F_NODE)
676 seq_printf(seq, "NODE_ALLOC "); 676 seq_puts(seq, "NODE_ALLOC ");
677 677
678 seq_puts(seq, "\n"); 678 seq_puts(seq, "\n");
679 679
@@ -716,7 +716,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
716 if (pkt_dev->result[0]) 716 if (pkt_dev->result[0])
717 seq_printf(seq, "Result: %s\n", pkt_dev->result); 717 seq_printf(seq, "Result: %s\n", pkt_dev->result);
718 else 718 else
719 seq_printf(seq, "Result: Idle\n"); 719 seq_puts(seq, "Result: Idle\n");
720 720
721 return 0; 721 return 0;
722} 722}
@@ -1735,14 +1735,14 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
1735 1735
1736 BUG_ON(!t); 1736 BUG_ON(!t);
1737 1737
1738 seq_printf(seq, "Running: "); 1738 seq_puts(seq, "Running: ");
1739 1739
1740 if_lock(t); 1740 if_lock(t);
1741 list_for_each_entry(pkt_dev, &t->if_list, list) 1741 list_for_each_entry(pkt_dev, &t->if_list, list)
1742 if (pkt_dev->running) 1742 if (pkt_dev->running)
1743 seq_printf(seq, "%s ", pkt_dev->odevname); 1743 seq_printf(seq, "%s ", pkt_dev->odevname);
1744 1744
1745 seq_printf(seq, "\nStopped: "); 1745 seq_puts(seq, "\nStopped: ");
1746 1746
1747 list_for_each_entry(pkt_dev, &t->if_list, list) 1747 list_for_each_entry(pkt_dev, &t->if_list, list)
1748 if (!pkt_dev->running) 1748 if (!pkt_dev->running)
@@ -1751,7 +1751,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
1751 if (t->result[0]) 1751 if (t->result[0])
1752 seq_printf(seq, "\nResult: %s\n", t->result); 1752 seq_printf(seq, "\nResult: %s\n", t->result);
1753 else 1753 else
1754 seq_printf(seq, "\nResult: NA\n"); 1754 seq_puts(seq, "\nResult: NA\n");
1755 1755
1756 if_unlock(t); 1756 if_unlock(t);
1757 1757
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index eaba0f68f860..d3027a73fd4b 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -88,7 +88,7 @@ EXPORT_SYMBOL_GPL(ptp_classify_raw);
88 88
89void __init ptp_classifier_init(void) 89void __init ptp_classifier_init(void)
90{ 90{
91 static struct sock_filter ptp_filter[] = { 91 static struct sock_filter ptp_filter[] __initdata = {
92 { 0x28, 0, 0, 0x0000000c }, 92 { 0x28, 0, 0, 0x0000000c },
93 { 0x15, 0, 12, 0x00000800 }, 93 { 0x15, 0, 12, 0x00000800 },
94 { 0x30, 0, 0, 0x00000017 }, 94 { 0x30, 0, 0, 0x00000017 },
@@ -133,7 +133,7 @@ void __init ptp_classifier_init(void)
133 { 0x16, 0, 0, 0x00000000 }, 133 { 0x16, 0, 0, 0x00000000 },
134 { 0x06, 0, 0, 0x00000000 }, 134 { 0x06, 0, 0, 0x00000000 },
135 }; 135 };
136 struct sock_fprog ptp_prog = { 136 struct sock_fprog_kern ptp_prog = {
137 .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter, 137 .len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
138 }; 138 };
139 139
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d8d8fcfa060..1063996f8317 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -798,8 +798,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
798 size += num_vfs * 798 size += num_vfs *
799 (nla_total_size(sizeof(struct ifla_vf_mac)) + 799 (nla_total_size(sizeof(struct ifla_vf_mac)) +
800 nla_total_size(sizeof(struct ifla_vf_vlan)) + 800 nla_total_size(sizeof(struct ifla_vf_vlan)) +
801 nla_total_size(sizeof(struct ifla_vf_tx_rate)) + 801 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
802 nla_total_size(sizeof(struct ifla_vf_spoofchk))); 802 nla_total_size(sizeof(struct ifla_vf_rate)));
803 return size; 803 return size;
804 } else 804 } else
805 return 0; 805 return 0;
@@ -1065,6 +1065,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1065 struct ifla_vf_info ivi; 1065 struct ifla_vf_info ivi;
1066 struct ifla_vf_mac vf_mac; 1066 struct ifla_vf_mac vf_mac;
1067 struct ifla_vf_vlan vf_vlan; 1067 struct ifla_vf_vlan vf_vlan;
1068 struct ifla_vf_rate vf_rate;
1068 struct ifla_vf_tx_rate vf_tx_rate; 1069 struct ifla_vf_tx_rate vf_tx_rate;
1069 struct ifla_vf_spoofchk vf_spoofchk; 1070 struct ifla_vf_spoofchk vf_spoofchk;
1070 struct ifla_vf_link_state vf_linkstate; 1071 struct ifla_vf_link_state vf_linkstate;
@@ -1085,6 +1086,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1085 break; 1086 break;
1086 vf_mac.vf = 1087 vf_mac.vf =
1087 vf_vlan.vf = 1088 vf_vlan.vf =
1089 vf_rate.vf =
1088 vf_tx_rate.vf = 1090 vf_tx_rate.vf =
1089 vf_spoofchk.vf = 1091 vf_spoofchk.vf =
1090 vf_linkstate.vf = ivi.vf; 1092 vf_linkstate.vf = ivi.vf;
@@ -1092,7 +1094,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1092 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); 1094 memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
1093 vf_vlan.vlan = ivi.vlan; 1095 vf_vlan.vlan = ivi.vlan;
1094 vf_vlan.qos = ivi.qos; 1096 vf_vlan.qos = ivi.qos;
1095 vf_tx_rate.rate = ivi.tx_rate; 1097 vf_tx_rate.rate = ivi.max_tx_rate;
1098 vf_rate.min_tx_rate = ivi.min_tx_rate;
1099 vf_rate.max_tx_rate = ivi.max_tx_rate;
1096 vf_spoofchk.setting = ivi.spoofchk; 1100 vf_spoofchk.setting = ivi.spoofchk;
1097 vf_linkstate.link_state = ivi.linkstate; 1101 vf_linkstate.link_state = ivi.linkstate;
1098 vf = nla_nest_start(skb, IFLA_VF_INFO); 1102 vf = nla_nest_start(skb, IFLA_VF_INFO);
@@ -1102,6 +1106,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
1102 } 1106 }
1103 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || 1107 if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
1104 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || 1108 nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
1109 nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
1110 &vf_rate) ||
1105 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), 1111 nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
1106 &vf_tx_rate) || 1112 &vf_tx_rate) ||
1107 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), 1113 nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
@@ -1208,6 +1214,10 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
1208 .len = sizeof(struct ifla_vf_tx_rate) }, 1214 .len = sizeof(struct ifla_vf_tx_rate) },
1209 [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY, 1215 [IFLA_VF_SPOOFCHK] = { .type = NLA_BINARY,
1210 .len = sizeof(struct ifla_vf_spoofchk) }, 1216 .len = sizeof(struct ifla_vf_spoofchk) },
1217 [IFLA_VF_RATE] = { .type = NLA_BINARY,
1218 .len = sizeof(struct ifla_vf_rate) },
1219 [IFLA_VF_LINK_STATE] = { .type = NLA_BINARY,
1220 .len = sizeof(struct ifla_vf_link_state) },
1211}; 1221};
1212 1222
1213static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { 1223static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1234,6 +1244,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1234 struct nlattr *tb[IFLA_MAX+1]; 1244 struct nlattr *tb[IFLA_MAX+1];
1235 u32 ext_filter_mask = 0; 1245 u32 ext_filter_mask = 0;
1236 int err; 1246 int err;
1247 int hdrlen;
1237 1248
1238 s_h = cb->args[0]; 1249 s_h = cb->args[0];
1239 s_idx = cb->args[1]; 1250 s_idx = cb->args[1];
@@ -1241,8 +1252,17 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
1241 rcu_read_lock(); 1252 rcu_read_lock();
1242 cb->seq = net->dev_base_seq; 1253 cb->seq = net->dev_base_seq;
1243 1254
1244 if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, 1255 /* A hack to preserve kernel<->userspace interface.
1245 ifla_policy) >= 0) { 1256 * The correct header is ifinfomsg. It is consistent with rtnl_getlink.
1257 * However, before Linux v3.9 the code here assumed rtgenmsg and that's
1258 * what iproute2 < v3.9.0 used.
1259 * We can detect the old iproute2. Even including the IFLA_EXT_MASK
1260 * attribute, its netlink message is shorter than struct ifinfomsg.
1261 */
1262 hdrlen = nlmsg_len(cb->nlh) < sizeof(struct ifinfomsg) ?
1263 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
1264
1265 if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
1246 1266
1247 if (tb[IFLA_EXT_MASK]) 1267 if (tb[IFLA_EXT_MASK])
1248 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1268 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
@@ -1367,11 +1387,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
1367 } 1387 }
1368 case IFLA_VF_TX_RATE: { 1388 case IFLA_VF_TX_RATE: {
1369 struct ifla_vf_tx_rate *ivt; 1389 struct ifla_vf_tx_rate *ivt;
1390 struct ifla_vf_info ivf;
1370 ivt = nla_data(vf); 1391 ivt = nla_data(vf);
1371 err = -EOPNOTSUPP; 1392 err = -EOPNOTSUPP;
1372 if (ops->ndo_set_vf_tx_rate) 1393 if (ops->ndo_get_vf_config)
1373 err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, 1394 err = ops->ndo_get_vf_config(dev, ivt->vf,
1374 ivt->rate); 1395 &ivf);
1396 if (err)
1397 break;
1398 err = -EOPNOTSUPP;
1399 if (ops->ndo_set_vf_rate)
1400 err = ops->ndo_set_vf_rate(dev, ivt->vf,
1401 ivf.min_tx_rate,
1402 ivt->rate);
1403 break;
1404 }
1405 case IFLA_VF_RATE: {
1406 struct ifla_vf_rate *ivt;
1407 ivt = nla_data(vf);
1408 err = -EOPNOTSUPP;
1409 if (ops->ndo_set_vf_rate)
1410 err = ops->ndo_set_vf_rate(dev, ivt->vf,
1411 ivt->min_tx_rate,
1412 ivt->max_tx_rate);
1375 break; 1413 break;
1376 } 1414 }
1377 case IFLA_VF_SPOOFCHK: { 1415 case IFLA_VF_SPOOFCHK: {
@@ -1744,7 +1782,6 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
1744 1782
1745 ops->dellink(dev, &list_kill); 1783 ops->dellink(dev, &list_kill);
1746 unregister_netdevice_many(&list_kill); 1784 unregister_netdevice_many(&list_kill);
1747 list_del(&list_kill);
1748 return 0; 1785 return 0;
1749} 1786}
1750 1787
@@ -2019,11 +2056,15 @@ replay:
2019 if (ops->newlink) { 2056 if (ops->newlink) {
2020 err = ops->newlink(net, dev, tb, data); 2057 err = ops->newlink(net, dev, tb, data);
2021 /* Drivers should call free_netdev() in ->destructor 2058 /* Drivers should call free_netdev() in ->destructor
2022 * and unregister it on failure so that device could be 2059 * and unregister it on failure after registration
2023 * finally freed in rtnl_unlock. 2060 * so that device could be finally freed in rtnl_unlock.
2024 */ 2061 */
2025 if (err < 0) 2062 if (err < 0) {
2063 /* If device is not registered at all, free it now */
2064 if (dev->reg_state == NETREG_UNINITIALIZED)
2065 free_netdev(dev);
2026 goto out; 2066 goto out;
2067 }
2027 } else { 2068 } else {
2028 err = register_netdevice(dev); 2069 err = register_netdevice(dev);
2029 if (err < 0) { 2070 if (err < 0) {
@@ -2095,9 +2136,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
2095 struct nlattr *tb[IFLA_MAX+1]; 2136 struct nlattr *tb[IFLA_MAX+1];
2096 u32 ext_filter_mask = 0; 2137 u32 ext_filter_mask = 0;
2097 u16 min_ifinfo_dump_size = 0; 2138 u16 min_ifinfo_dump_size = 0;
2139 int hdrlen;
2140
2141 /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */
2142 hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ?
2143 sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg);
2098 2144
2099 if (nlmsg_parse(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, 2145 if (nlmsg_parse(nlh, hdrlen, tb, IFLA_MAX, ifla_policy) >= 0) {
2100 ifla_policy) >= 0) {
2101 if (tb[IFLA_EXT_MASK]) 2146 if (tb[IFLA_EXT_MASK])
2102 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 2147 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]);
2103 } 2148 }
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 897da56f3aff..ba71212f0251 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -85,31 +85,6 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
85#endif 85#endif
86 86
87#ifdef CONFIG_INET 87#ifdef CONFIG_INET
88__u32 secure_ip_id(__be32 daddr)
89{
90 u32 hash[MD5_DIGEST_WORDS];
91
92 net_secret_init();
93 hash[0] = (__force __u32) daddr;
94 hash[1] = net_secret[13];
95 hash[2] = net_secret[14];
96 hash[3] = net_secret[15];
97
98 md5_transform(hash, net_secret);
99
100 return hash[0];
101}
102
103__u32 secure_ipv6_id(const __be32 daddr[4])
104{
105 __u32 hash[4];
106
107 net_secret_init();
108 memcpy(hash, daddr, 16);
109 md5_transform(hash, net_secret);
110
111 return hash[0];
112}
113 88
114__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, 89__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
115 __be16 sport, __be16 dport) 90 __be16 sport, __be16 dport)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8383b2bddeb9..bf92824af3f7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -694,7 +694,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
694#endif 694#endif
695 memcpy(new->cb, old->cb, sizeof(old->cb)); 695 memcpy(new->cb, old->cb, sizeof(old->cb));
696 new->csum = old->csum; 696 new->csum = old->csum;
697 new->local_df = old->local_df; 697 new->ignore_df = old->ignore_df;
698 new->pkt_type = old->pkt_type; 698 new->pkt_type = old->pkt_type;
699 new->ip_summed = old->ip_summed; 699 new->ip_summed = old->ip_summed;
700 skb_copy_queue_mapping(new, old); 700 skb_copy_queue_mapping(new, old);
@@ -951,10 +951,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
951EXPORT_SYMBOL(skb_copy); 951EXPORT_SYMBOL(skb_copy);
952 952
953/** 953/**
954 * __pskb_copy - create copy of an sk_buff with private head. 954 * __pskb_copy_fclone - create copy of an sk_buff with private head.
955 * @skb: buffer to copy 955 * @skb: buffer to copy
956 * @headroom: headroom of new skb 956 * @headroom: headroom of new skb
957 * @gfp_mask: allocation priority 957 * @gfp_mask: allocation priority
958 * @fclone: if true allocate the copy of the skb from the fclone
959 * cache instead of the head cache; it is recommended to set this
960 * to true for the cases where the copy will likely be cloned
958 * 961 *
959 * Make a copy of both an &sk_buff and part of its data, located 962 * Make a copy of both an &sk_buff and part of its data, located
960 * in header. Fragmented data remain shared. This is used when 963 * in header. Fragmented data remain shared. This is used when
@@ -964,11 +967,12 @@ EXPORT_SYMBOL(skb_copy);
964 * The returned buffer has a reference count of 1. 967 * The returned buffer has a reference count of 1.
965 */ 968 */
966 969
967struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) 970struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
971 gfp_t gfp_mask, bool fclone)
968{ 972{
969 unsigned int size = skb_headlen(skb) + headroom; 973 unsigned int size = skb_headlen(skb) + headroom;
970 struct sk_buff *n = __alloc_skb(size, gfp_mask, 974 int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
971 skb_alloc_rx_flag(skb), NUMA_NO_NODE); 975 struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
972 976
973 if (!n) 977 if (!n)
974 goto out; 978 goto out;
@@ -1008,7 +1012,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
1008out: 1012out:
1009 return n; 1013 return n;
1010} 1014}
1011EXPORT_SYMBOL(__pskb_copy); 1015EXPORT_SYMBOL(__pskb_copy_fclone);
1012 1016
1013/** 1017/**
1014 * pskb_expand_head - reallocate header of &sk_buff 1018 * pskb_expand_head - reallocate header of &sk_buff
@@ -2881,12 +2885,14 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
2881 int pos; 2885 int pos;
2882 int dummy; 2886 int dummy;
2883 2887
2888 __skb_push(head_skb, doffset);
2884 proto = skb_network_protocol(head_skb, &dummy); 2889 proto = skb_network_protocol(head_skb, &dummy);
2885 if (unlikely(!proto)) 2890 if (unlikely(!proto))
2886 return ERR_PTR(-EINVAL); 2891 return ERR_PTR(-EINVAL);
2887 2892
2888 csum = !!can_checksum_protocol(features, proto); 2893 csum = !head_skb->encap_hdr_csum &&
2889 __skb_push(head_skb, doffset); 2894 !!can_checksum_protocol(features, proto);
2895
2890 headroom = skb_headroom(head_skb); 2896 headroom = skb_headroom(head_skb);
2891 pos = skb_headlen(head_skb); 2897 pos = skb_headlen(head_skb);
2892 2898
@@ -2983,6 +2989,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
2983 nskb->csum = skb_copy_and_csum_bits(head_skb, offset, 2989 nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
2984 skb_put(nskb, len), 2990 skb_put(nskb, len),
2985 len, 0); 2991 len, 0);
2992 SKB_GSO_CB(nskb)->csum_start =
2993 skb_headroom(nskb) + offset;
2986 continue; 2994 continue;
2987 } 2995 }
2988 2996
@@ -3052,6 +3060,8 @@ perform_csum_check:
3052 nskb->csum = skb_checksum(nskb, doffset, 3060 nskb->csum = skb_checksum(nskb, doffset,
3053 nskb->len - doffset, 0); 3061 nskb->len - doffset, 0);
3054 nskb->ip_summed = CHECKSUM_NONE; 3062 nskb->ip_summed = CHECKSUM_NONE;
3063 SKB_GSO_CB(nskb)->csum_start =
3064 skb_headroom(nskb) + doffset;
3055 } 3065 }
3056 } while ((offset += len) < head_skb->len); 3066 } while ((offset += len) < head_skb->len);
3057 3067
@@ -3913,7 +3923,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
3913 skb->tstamp.tv64 = 0; 3923 skb->tstamp.tv64 = 0;
3914 skb->pkt_type = PACKET_HOST; 3924 skb->pkt_type = PACKET_HOST;
3915 skb->skb_iif = 0; 3925 skb->skb_iif = 0;
3916 skb->local_df = 0; 3926 skb->ignore_df = 0;
3917 skb_dst_drop(skb); 3927 skb_dst_drop(skb);
3918 skb->mark = 0; 3928 skb->mark = 0;
3919 secpath_reset(skb); 3929 secpath_reset(skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 664ee4295b6f..026e01f70274 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -784,7 +784,7 @@ set_rcvbuf:
784 break; 784 break;
785 785
786 case SO_NO_CHECK: 786 case SO_NO_CHECK:
787 sk->sk_no_check = valbool; 787 sk->sk_no_check_tx = valbool;
788 break; 788 break;
789 789
790 case SO_PRIORITY: 790 case SO_PRIORITY:
@@ -1064,7 +1064,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
1064 break; 1064 break;
1065 1065
1066 case SO_NO_CHECK: 1066 case SO_NO_CHECK:
1067 v.val = sk->sk_no_check; 1067 v.val = sk->sk_no_check_tx;
1068 break; 1068 break;
1069 1069
1070 case SO_PRIORITY: 1070 case SO_PRIORITY:
diff --git a/net/core/tso.c b/net/core/tso.c
new file mode 100644
index 000000000000..8c3203c585b0
--- /dev/null
+++ b/net/core/tso.c
@@ -0,0 +1,77 @@
1#include <linux/export.h>
2#include <net/ip.h>
3#include <net/tso.h>
4
5/* Calculate expected number of TX descriptors */
6int tso_count_descs(struct sk_buff *skb)
7{
8 /* The Marvell Way */
9 return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
10}
11EXPORT_SYMBOL(tso_count_descs);
12
13void tso_build_hdr(struct sk_buff *skb, char *hdr, struct tso_t *tso,
14 int size, bool is_last)
15{
16 struct iphdr *iph;
17 struct tcphdr *tcph;
18 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
19 int mac_hdr_len = skb_network_offset(skb);
20
21 memcpy(hdr, skb->data, hdr_len);
22 iph = (struct iphdr *)(hdr + mac_hdr_len);
23 iph->id = htons(tso->ip_id);
24 iph->tot_len = htons(size + hdr_len - mac_hdr_len);
25 tcph = (struct tcphdr *)(hdr + skb_transport_offset(skb));
26 tcph->seq = htonl(tso->tcp_seq);
27 tso->ip_id++;
28
29 if (!is_last) {
30 /* Clear all special flags for not last packet */
31 tcph->psh = 0;
32 tcph->fin = 0;
33 tcph->rst = 0;
34 }
35}
36EXPORT_SYMBOL(tso_build_hdr);
37
38void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
39{
40 tso->tcp_seq += size;
41 tso->size -= size;
42 tso->data += size;
43
44 if ((tso->size == 0) &&
45 (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
46 skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
47
48 /* Move to next segment */
49 tso->size = frag->size;
50 tso->data = page_address(frag->page.p) + frag->page_offset;
51 tso->next_frag_idx++;
52 }
53}
54EXPORT_SYMBOL(tso_build_data);
55
56void tso_start(struct sk_buff *skb, struct tso_t *tso)
57{
58 int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
59
60 tso->ip_id = ntohs(ip_hdr(skb)->id);
61 tso->tcp_seq = ntohl(tcp_hdr(skb)->seq);
62 tso->next_frag_idx = 0;
63
64 /* Build first data */
65 tso->size = skb_headlen(skb) - hdr_len;
66 tso->data = skb->data + hdr_len;
67 if ((tso->size == 0) &&
68 (tso->next_frag_idx < skb_shinfo(skb)->nr_frags)) {
69 skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
70
71 /* Move to next segment */
72 tso->size = frag->size;
73 tso->data = page_address(frag->page.p) + frag->page_offset;
74 tso->next_frag_idx++;
75 }
76}
77EXPORT_SYMBOL(tso_start);