aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-01-07 21:26:31 -0500
committerDavid S. Miller <davem@davemloft.net>2018-01-07 21:26:31 -0500
commit7f0b800048b562d716372466ea8d9de648c422dd (patch)
tree8fbad920adc333fd00cbc3acaba09cdfa9b63fb3
parentd0adb51edb73c94a595bfa9d9bd8b35977e74fbf (diff)
parent9be99badee761f0b2c065ecbd8bd54a96cbd0fa0 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-01-07 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add a start of a framework for extending struct xdp_buff without having the overhead of populating every data at runtime. Idea is to have a new per-queue struct xdp_rxq_info that holds read mostly data (currently that is, queue number and a pointer to the corresponding netdev) which is set up during rxqueue config time. When a XDP program is invoked, struct xdp_buff holds a pointer to struct xdp_rxq_info that the BPF program can then walk. The user facing BPF program that uses struct xdp_md for context can use these members directly, and the verifier rewrites context access transparently by walking the xdp_rxq_info and net_device pointers to load the data, from Jesper. 2) Redo the reporting of offload device information to user space such that it works in combination with network namespaces. The latter is reported through a device/inode tuple as similarly done in other subsystems as well (e.g. perf) in order to identify the namespace. For this to work, ns_get_path() has been generalized such that the namespace can be retrieved not only from a specific task (perf case), but also from a callback where we deduce the netns (ns_common) from a netdevice. bpftool support using the new uapi info and extensive test cases for test_offload.py in BPF selftests have been added as well, from Jakub. 3) Add two bpftool improvements: i) properly report the bpftool version such that it corresponds to the version from the kernel source tree. So pick the right linux/version.h from the source tree instead of the installed one. ii) fix bpftool and also bpf_jit_disasm build with bintutils >= 2.9. The reason for the build breakage is that binutils library changed the function signature to select the disassembler. Given this is needed in multiple tools, add a proper feature detection to the tools/build/features infrastructure, from Roman. 4) Implement the BPF syscall command BPF_MAP_GET_NEXT_KEY for the stacktrace map. It is currently unimplemented, but there are use cases where user space needs to walk all stacktrace map entries e.g. for dumping or deleting map entries w/o having to close and recreate the map. Add BPF selftests along with it, from Yonghong. 5) Few follow-up cleanups for the bpftool cgroup code: i) rename the cgroup 'list' command into 'show' as we have it for other subcommands as well, ii) then alias the 'show' command such that 'list' is accepted which is also common practice in iproute2, and iii) remove couple of newlines from error messages using p_err(), from Jakub. 6) Two follow-up cleanups to sockmap code: i) remove the unused bpf_compute_data_end_sk_skb() function and ii) only build the sockmap infrastructure when CONFIG_INET is enabled since it's only aware of TCP sockets at this time, from John. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c1
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c11
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.c4
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_queues.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h5
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c10
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c10
-rw-r--r--drivers/net/netdevsim/bpf.c2
-rw-r--r--drivers/net/tun.c24
-rw-r--r--drivers/net/virtio_net.c14
-rw-r--r--fs/nsfs.c29
-rw-r--r--include/linux/bpf.h18
-rw-r--r--include/linux/bpf_types.h2
-rw-r--r--include/linux/bpf_verifier.h16
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/proc_ns.h3
-rw-r--r--include/net/xdp.h48
-rw-r--r--include/uapi/linux/bpf.h6
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/offload.c147
-rw-r--r--kernel/bpf/sockmap.c8
-rw-r--r--kernel/bpf/stackmap.c28
-rw-r--r--kernel/bpf/syscall.c19
-rw-r--r--kernel/bpf/verifier.c20
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c69
-rw-r--r--net/core/filter.c19
-rw-r--r--net/core/xdp.c73
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/xdp_rxq_info_kern.c96
-rw-r--r--samples/bpf/xdp_rxq_info_user.c531
-rw-r--r--tools/bpf/Makefile29
-rw-r--r--tools/bpf/bpf_jit_disasm.c7
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst6
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst6
-rw-r--r--tools/bpf/bpftool/Makefile27
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool8
-rw-r--r--tools/bpf/bpftool/cgroup.c35
-rw-r--r--tools/bpf/bpftool/common.c52
-rw-r--r--tools/bpf/bpftool/jit_disasm.c7
-rw-r--r--tools/bpf/bpftool/main.c13
-rw-r--r--tools/bpf/bpftool/main.h2
-rw-r--r--tools/bpf/bpftool/map.c3
-rw-r--r--tools/bpf/bpftool/prog.c10
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-disassembler-four-args.c15
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py112
-rw-r--r--tools/testing/selftests/bpf/test_progs.c127
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_map.c62
72 files changed, 1687 insertions, 178 deletions
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9efbdc6f1fcb..89c3c8760a78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2247,6 +2247,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
2247 if (rxr->xdp_prog) 2247 if (rxr->xdp_prog)
2248 bpf_prog_put(rxr->xdp_prog); 2248 bpf_prog_put(rxr->xdp_prog);
2249 2249
2250 if (xdp_rxq_info_is_reg(&rxr->xdp_rxq))
2251 xdp_rxq_info_unreg(&rxr->xdp_rxq);
2252
2250 kfree(rxr->rx_tpa); 2253 kfree(rxr->rx_tpa);
2251 rxr->rx_tpa = NULL; 2254 rxr->rx_tpa = NULL;
2252 2255
@@ -2280,6 +2283,10 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
2280 2283
2281 ring = &rxr->rx_ring_struct; 2284 ring = &rxr->rx_ring_struct;
2282 2285
2286 rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
2287 if (rc < 0)
2288 return rc;
2289
2283 rc = bnxt_alloc_ring(bp, ring); 2290 rc = bnxt_alloc_ring(bp, ring);
2284 if (rc) 2291 if (rc)
2285 return rc; 2292 return rc;
@@ -2834,6 +2841,9 @@ void bnxt_set_ring_params(struct bnxt *bp)
2834 bp->cp_ring_mask = bp->cp_bit - 1; 2841 bp->cp_ring_mask = bp->cp_bit - 1;
2835} 2842}
2836 2843
2844/* Changing allocation mode of RX rings.
2845 * TODO: Update when extending xdp_rxq_info to support allocation modes.
2846 */
2837int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode) 2847int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
2838{ 2848{
2839 if (page_mode) { 2849 if (page_mode) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5359a1f0045f..2d268fc26f5e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -23,6 +23,7 @@
23#include <net/devlink.h> 23#include <net/devlink.h>
24#include <net/dst_metadata.h> 24#include <net/dst_metadata.h>
25#include <net/switchdev.h> 25#include <net/switchdev.h>
26#include <net/xdp.h>
26 27
27struct tx_bd { 28struct tx_bd {
28 __le32 tx_bd_len_flags_type; 29 __le32 tx_bd_len_flags_type;
@@ -664,6 +665,7 @@ struct bnxt_rx_ring_info {
664 665
665 struct bnxt_ring_struct rx_ring_struct; 666 struct bnxt_ring_struct rx_ring_struct;
666 struct bnxt_ring_struct rx_agg_ring_struct; 667 struct bnxt_ring_struct rx_agg_ring_struct;
668 struct xdp_rxq_info xdp_rxq;
667}; 669};
668 670
669struct bnxt_cp_ring_info { 671struct bnxt_cp_ring_info {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 261e5847557a..1389ab5e05df 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -96,6 +96,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
96 xdp.data = *data_ptr; 96 xdp.data = *data_ptr;
97 xdp_set_data_meta_invalid(&xdp); 97 xdp_set_data_meta_invalid(&xdp);
98 xdp.data_end = *data_ptr + *len; 98 xdp.data_end = *data_ptr + *len;
99 xdp.rxq = &rxr->xdp_rxq;
99 orig_data = xdp.data; 100 orig_data = xdp.data;
100 mapping = rx_buf->mapping - bp->rx_dma_offset; 101 mapping = rx_buf->mapping - bp->rx_dma_offset;
101 102
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 52b3a6044f85..21618d0d694f 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -521,7 +521,7 @@ static void nicvf_unmap_page(struct nicvf *nic, struct page *page, u64 dma_addr)
521 521
522static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, 522static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
523 struct cqe_rx_t *cqe_rx, struct snd_queue *sq, 523 struct cqe_rx_t *cqe_rx, struct snd_queue *sq,
524 struct sk_buff **skb) 524 struct rcv_queue *rq, struct sk_buff **skb)
525{ 525{
526 struct xdp_buff xdp; 526 struct xdp_buff xdp;
527 struct page *page; 527 struct page *page;
@@ -545,6 +545,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
545 xdp.data = (void *)cpu_addr; 545 xdp.data = (void *)cpu_addr;
546 xdp_set_data_meta_invalid(&xdp); 546 xdp_set_data_meta_invalid(&xdp);
547 xdp.data_end = xdp.data + len; 547 xdp.data_end = xdp.data + len;
548 xdp.rxq = &rq->xdp_rxq;
548 orig_data = xdp.data; 549 orig_data = xdp.data;
549 550
550 rcu_read_lock(); 551 rcu_read_lock();
@@ -698,7 +699,8 @@ static inline void nicvf_set_rxhash(struct net_device *netdev,
698 699
699static void nicvf_rcv_pkt_handler(struct net_device *netdev, 700static void nicvf_rcv_pkt_handler(struct net_device *netdev,
700 struct napi_struct *napi, 701 struct napi_struct *napi,
701 struct cqe_rx_t *cqe_rx, struct snd_queue *sq) 702 struct cqe_rx_t *cqe_rx,
703 struct snd_queue *sq, struct rcv_queue *rq)
702{ 704{
703 struct sk_buff *skb = NULL; 705 struct sk_buff *skb = NULL;
704 struct nicvf *nic = netdev_priv(netdev); 706 struct nicvf *nic = netdev_priv(netdev);
@@ -724,7 +726,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev,
724 /* For XDP, ignore pkts spanning multiple pages */ 726 /* For XDP, ignore pkts spanning multiple pages */
725 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { 727 if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) {
726 /* Packet consumed by XDP */ 728 /* Packet consumed by XDP */
727 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb)) 729 if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, rq, &skb))
728 return; 730 return;
729 } else { 731 } else {
730 skb = nicvf_get_rcv_skb(snic, cqe_rx, 732 skb = nicvf_get_rcv_skb(snic, cqe_rx,
@@ -781,6 +783,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
781 struct cqe_rx_t *cq_desc; 783 struct cqe_rx_t *cq_desc;
782 struct netdev_queue *txq; 784 struct netdev_queue *txq;
783 struct snd_queue *sq = &qs->sq[cq_idx]; 785 struct snd_queue *sq = &qs->sq[cq_idx];
786 struct rcv_queue *rq = &qs->rq[cq_idx];
784 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; 787 unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx;
785 788
786 spin_lock_bh(&cq->lock); 789 spin_lock_bh(&cq->lock);
@@ -811,7 +814,7 @@ loop:
811 814
812 switch (cq_desc->cqe_type) { 815 switch (cq_desc->cqe_type) {
813 case CQE_TYPE_RX: 816 case CQE_TYPE_RX:
814 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq); 817 nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq, rq);
815 work_done++; 818 work_done++;
816 break; 819 break;
817 case CQE_TYPE_SEND: 820 case CQE_TYPE_SEND:
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index f38ea349aa00..14e62c6ac342 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -760,6 +760,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
760 760
761 if (!rq->enable) { 761 if (!rq->enable) {
762 nicvf_reclaim_rcv_queue(nic, qs, qidx); 762 nicvf_reclaim_rcv_queue(nic, qs, qidx);
763 xdp_rxq_info_unreg(&rq->xdp_rxq);
763 return; 764 return;
764 } 765 }
765 766
@@ -772,6 +773,9 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
772 /* all writes of RBDR data to be loaded into L2 Cache as well*/ 773 /* all writes of RBDR data to be loaded into L2 Cache as well*/
773 rq->caching = 1; 774 rq->caching = 1;
774 775
776 /* Driver have no proper error path for failed XDP RX-queue info reg */
777 WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
778
775 /* Send a mailbox msg to PF to config RQ */ 779 /* Send a mailbox msg to PF to config RQ */
776 mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG; 780 mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
777 mbx.rq.qs_num = qs->vnic_id; 781 mbx.rq.qs_num = qs->vnic_id;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 178ab6e8e3c5..7d1e4e2aaad0 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -12,6 +12,7 @@
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/iommu.h> 13#include <linux/iommu.h>
14#include <linux/bpf.h> 14#include <linux/bpf.h>
15#include <net/xdp.h>
15#include "q_struct.h" 16#include "q_struct.h"
16 17
17#define MAX_QUEUE_SET 128 18#define MAX_QUEUE_SET 128
@@ -255,6 +256,7 @@ struct rcv_queue {
255 u8 start_qs_rbdr_idx; /* RBDR idx in the above QS */ 256 u8 start_qs_rbdr_idx; /* RBDR idx in the above QS */
256 u8 caching; 257 u8 caching;
257 struct rx_tx_queue_stats stats; 258 struct rx_tx_queue_stats stats;
259 struct xdp_rxq_info xdp_rxq;
258} ____cacheline_aligned_in_smp; 260} ____cacheline_aligned_in_smp;
259 261
260struct cmp_queue { 262struct cmp_queue {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 5f6cf7212d4f..cfd788b4fd7a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1585,6 +1585,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
1585 */ 1585 */
1586 rx_rings[i].desc = NULL; 1586 rx_rings[i].desc = NULL;
1587 rx_rings[i].rx_bi = NULL; 1587 rx_rings[i].rx_bi = NULL;
1588 /* Clear cloned XDP RX-queue info before setup call */
1589 memset(&rx_rings[i].xdp_rxq, 0, sizeof(rx_rings[i].xdp_rxq));
1588 /* this is to allow wr32 to have something to write to 1590 /* this is to allow wr32 to have something to write to
1589 * during early allocation of Rx buffers 1591 * during early allocation of Rx buffers
1590 */ 1592 */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4566d66ffc7c..2a8a85e3ae8f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -27,6 +27,7 @@
27#include <linux/prefetch.h> 27#include <linux/prefetch.h>
28#include <net/busy_poll.h> 28#include <net/busy_poll.h>
29#include <linux/bpf_trace.h> 29#include <linux/bpf_trace.h>
30#include <net/xdp.h>
30#include "i40e.h" 31#include "i40e.h"
31#include "i40e_trace.h" 32#include "i40e_trace.h"
32#include "i40e_prototype.h" 33#include "i40e_prototype.h"
@@ -1236,6 +1237,8 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1236void i40e_free_rx_resources(struct i40e_ring *rx_ring) 1237void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1237{ 1238{
1238 i40e_clean_rx_ring(rx_ring); 1239 i40e_clean_rx_ring(rx_ring);
1240 if (rx_ring->vsi->type == I40E_VSI_MAIN)
1241 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
1239 rx_ring->xdp_prog = NULL; 1242 rx_ring->xdp_prog = NULL;
1240 kfree(rx_ring->rx_bi); 1243 kfree(rx_ring->rx_bi);
1241 rx_ring->rx_bi = NULL; 1244 rx_ring->rx_bi = NULL;
@@ -1256,6 +1259,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1256int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring) 1259int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1257{ 1260{
1258 struct device *dev = rx_ring->dev; 1261 struct device *dev = rx_ring->dev;
1262 int err = -ENOMEM;
1259 int bi_size; 1263 int bi_size;
1260 1264
1261 /* warn if we are about to overwrite the pointer */ 1265 /* warn if we are about to overwrite the pointer */
@@ -1283,13 +1287,21 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1283 rx_ring->next_to_clean = 0; 1287 rx_ring->next_to_clean = 0;
1284 rx_ring->next_to_use = 0; 1288 rx_ring->next_to_use = 0;
1285 1289
1290 /* XDP RX-queue info only needed for RX rings exposed to XDP */
1291 if (rx_ring->vsi->type == I40E_VSI_MAIN) {
1292 err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
1293 rx_ring->queue_index);
1294 if (err < 0)
1295 goto err;
1296 }
1297
1286 rx_ring->xdp_prog = rx_ring->vsi->xdp_prog; 1298 rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
1287 1299
1288 return 0; 1300 return 0;
1289err: 1301err:
1290 kfree(rx_ring->rx_bi); 1302 kfree(rx_ring->rx_bi);
1291 rx_ring->rx_bi = NULL; 1303 rx_ring->rx_bi = NULL;
1292 return -ENOMEM; 1304 return err;
1293} 1305}
1294 1306
1295/** 1307/**
@@ -2068,11 +2080,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
2068 struct sk_buff *skb = rx_ring->skb; 2080 struct sk_buff *skb = rx_ring->skb;
2069 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); 2081 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
2070 bool failure = false, xdp_xmit = false; 2082 bool failure = false, xdp_xmit = false;
2083 struct xdp_buff xdp;
2084
2085 xdp.rxq = &rx_ring->xdp_rxq;
2071 2086
2072 while (likely(total_rx_packets < (unsigned int)budget)) { 2087 while (likely(total_rx_packets < (unsigned int)budget)) {
2073 struct i40e_rx_buffer *rx_buffer; 2088 struct i40e_rx_buffer *rx_buffer;
2074 union i40e_rx_desc *rx_desc; 2089 union i40e_rx_desc *rx_desc;
2075 struct xdp_buff xdp;
2076 unsigned int size; 2090 unsigned int size;
2077 u16 vlan_tag; 2091 u16 vlan_tag;
2078 u8 rx_ptype; 2092 u8 rx_ptype;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index fbae1182e2ea..2d08760fc4ce 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -27,6 +27,8 @@
27#ifndef _I40E_TXRX_H_ 27#ifndef _I40E_TXRX_H_
28#define _I40E_TXRX_H_ 28#define _I40E_TXRX_H_
29 29
30#include <net/xdp.h>
31
30/* Interrupt Throttling and Rate Limiting Goodies */ 32/* Interrupt Throttling and Rate Limiting Goodies */
31 33
32#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */ 34#define I40E_MAX_ITR 0x0FF0 /* reg uses 2 usec resolution */
@@ -428,6 +430,7 @@ struct i40e_ring {
428 */ 430 */
429 431
430 struct i40e_channel *ch; 432 struct i40e_channel *ch;
433 struct xdp_rxq_info xdp_rxq;
431} ____cacheline_internodealigned_in_smp; 434} ____cacheline_internodealigned_in_smp;
432 435
433static inline bool ring_uses_build_skb(struct i40e_ring *ring) 436static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 468c3555a629..8611763d6129 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -53,6 +53,7 @@
53#include <linux/dca.h> 53#include <linux/dca.h>
54#endif 54#endif
55 55
56#include <net/xdp.h>
56#include <net/busy_poll.h> 57#include <net/busy_poll.h>
57 58
58/* common prefix used by pr_<> macros */ 59/* common prefix used by pr_<> macros */
@@ -371,6 +372,7 @@ struct ixgbe_ring {
371 struct ixgbe_tx_queue_stats tx_stats; 372 struct ixgbe_tx_queue_stats tx_stats;
372 struct ixgbe_rx_queue_stats rx_stats; 373 struct ixgbe_rx_queue_stats rx_stats;
373 }; 374 };
375 struct xdp_rxq_info xdp_rxq;
374} ____cacheline_internodealigned_in_smp; 376} ____cacheline_internodealigned_in_smp;
375 377
376enum ixgbe_ring_f_enum { 378enum ixgbe_ring_f_enum {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 0aad1c2a3667..0aaf70b3cfcd 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1156,6 +1156,10 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
1156 memcpy(&temp_ring[i], adapter->rx_ring[i], 1156 memcpy(&temp_ring[i], adapter->rx_ring[i],
1157 sizeof(struct ixgbe_ring)); 1157 sizeof(struct ixgbe_ring));
1158 1158
1159 /* Clear copied XDP RX-queue info */
1160 memset(&temp_ring[i].xdp_rxq, 0,
1161 sizeof(temp_ring[i].xdp_rxq));
1162
1159 temp_ring[i].count = new_rx_count; 1163 temp_ring[i].count = new_rx_count;
1160 err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]); 1164 err = ixgbe_setup_rx_resources(adapter, &temp_ring[i]);
1161 if (err) { 1165 if (err) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7737a05c717c..95aba975b391 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2318,12 +2318,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
2318#endif /* IXGBE_FCOE */ 2318#endif /* IXGBE_FCOE */
2319 u16 cleaned_count = ixgbe_desc_unused(rx_ring); 2319 u16 cleaned_count = ixgbe_desc_unused(rx_ring);
2320 bool xdp_xmit = false; 2320 bool xdp_xmit = false;
2321 struct xdp_buff xdp;
2322
2323 xdp.rxq = &rx_ring->xdp_rxq;
2321 2324
2322 while (likely(total_rx_packets < budget)) { 2325 while (likely(total_rx_packets < budget)) {
2323 union ixgbe_adv_rx_desc *rx_desc; 2326 union ixgbe_adv_rx_desc *rx_desc;
2324 struct ixgbe_rx_buffer *rx_buffer; 2327 struct ixgbe_rx_buffer *rx_buffer;
2325 struct sk_buff *skb; 2328 struct sk_buff *skb;
2326 struct xdp_buff xdp;
2327 unsigned int size; 2329 unsigned int size;
2328 2330
2329 /* return some buffers to hardware, one at a time is too slow */ 2331 /* return some buffers to hardware, one at a time is too slow */
@@ -6444,6 +6446,11 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
6444 rx_ring->next_to_clean = 0; 6446 rx_ring->next_to_clean = 0;
6445 rx_ring->next_to_use = 0; 6447 rx_ring->next_to_use = 0;
6446 6448
6449 /* XDP RX-queue info */
6450 if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
6451 rx_ring->queue_index) < 0)
6452 goto err;
6453
6447 rx_ring->xdp_prog = adapter->xdp_prog; 6454 rx_ring->xdp_prog = adapter->xdp_prog;
6448 6455
6449 return 0; 6456 return 0;
@@ -6541,6 +6548,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
6541 ixgbe_clean_rx_ring(rx_ring); 6548 ixgbe_clean_rx_ring(rx_ring);
6542 6549
6543 rx_ring->xdp_prog = NULL; 6550 rx_ring->xdp_prog = NULL;
6551 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
6544 vfree(rx_ring->rx_buffer_info); 6552 vfree(rx_ring->rx_buffer_info);
6545 rx_ring->rx_buffer_info = NULL; 6553 rx_ring->rx_buffer_info = NULL;
6546 6554
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 21bc17fa3854..8fc51bc29003 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2172,8 +2172,9 @@ static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
2172 2172
2173 if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], 2173 if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
2174 prof->rx_ring_size, priv->stride, 2174 prof->rx_ring_size, priv->stride,
2175 node)) 2175 node, i))
2176 goto err; 2176 goto err;
2177
2177 } 2178 }
2178 2179
2179#ifdef CONFIG_RFS_ACCEL 2180#ifdef CONFIG_RFS_ACCEL
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 5f9dbc9a7f5b..b4d144e67514 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -262,7 +262,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
262 262
263int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, 263int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
264 struct mlx4_en_rx_ring **pring, 264 struct mlx4_en_rx_ring **pring,
265 u32 size, u16 stride, int node) 265 u32 size, u16 stride, int node, int queue_index)
266{ 266{
267 struct mlx4_en_dev *mdev = priv->mdev; 267 struct mlx4_en_dev *mdev = priv->mdev;
268 struct mlx4_en_rx_ring *ring; 268 struct mlx4_en_rx_ring *ring;
@@ -286,6 +286,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
286 ring->log_stride = ffs(ring->stride) - 1; 286 ring->log_stride = ffs(ring->stride) - 1;
287 ring->buf_size = ring->size * ring->stride + TXBB_SIZE; 287 ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
288 288
289 if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index) < 0)
290 goto err_ring;
291
289 tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * 292 tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
290 sizeof(struct mlx4_en_rx_alloc)); 293 sizeof(struct mlx4_en_rx_alloc));
291 ring->rx_info = vzalloc_node(tmp, node); 294 ring->rx_info = vzalloc_node(tmp, node);
@@ -293,7 +296,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
293 ring->rx_info = vzalloc(tmp); 296 ring->rx_info = vzalloc(tmp);
294 if (!ring->rx_info) { 297 if (!ring->rx_info) {
295 err = -ENOMEM; 298 err = -ENOMEM;
296 goto err_ring; 299 goto err_xdp_info;
297 } 300 }
298 } 301 }
299 302
@@ -317,6 +320,8 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
317err_info: 320err_info:
318 vfree(ring->rx_info); 321 vfree(ring->rx_info);
319 ring->rx_info = NULL; 322 ring->rx_info = NULL;
323err_xdp_info:
324 xdp_rxq_info_unreg(&ring->xdp_rxq);
320err_ring: 325err_ring:
321 kfree(ring); 326 kfree(ring);
322 *pring = NULL; 327 *pring = NULL;
@@ -440,6 +445,7 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
440 lockdep_is_held(&mdev->state_lock)); 445 lockdep_is_held(&mdev->state_lock));
441 if (old_prog) 446 if (old_prog)
442 bpf_prog_put(old_prog); 447 bpf_prog_put(old_prog);
448 xdp_rxq_info_unreg(&ring->xdp_rxq);
443 mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); 449 mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
444 vfree(ring->rx_info); 450 vfree(ring->rx_info);
445 ring->rx_info = NULL; 451 ring->rx_info = NULL;
@@ -652,6 +658,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
652 int cq_ring = cq->ring; 658 int cq_ring = cq->ring;
653 bool doorbell_pending; 659 bool doorbell_pending;
654 struct mlx4_cqe *cqe; 660 struct mlx4_cqe *cqe;
661 struct xdp_buff xdp;
655 int polled = 0; 662 int polled = 0;
656 int index; 663 int index;
657 664
@@ -666,6 +673,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
666 /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ 673 /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
667 rcu_read_lock(); 674 rcu_read_lock();
668 xdp_prog = rcu_dereference(ring->xdp_prog); 675 xdp_prog = rcu_dereference(ring->xdp_prog);
676 xdp.rxq = &ring->xdp_rxq;
669 doorbell_pending = 0; 677 doorbell_pending = 0;
670 678
671 /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx 679 /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
@@ -750,7 +758,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
750 * read bytes but not past the end of the frag. 758 * read bytes but not past the end of the frag.
751 */ 759 */
752 if (xdp_prog) { 760 if (xdp_prog) {
753 struct xdp_buff xdp;
754 dma_addr_t dma; 761 dma_addr_t dma;
755 void *orig_data; 762 void *orig_data;
756 u32 act; 763 u32 act;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 7db3d0d9bfce..f470ae37d937 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -46,6 +46,7 @@
46#endif 46#endif
47#include <linux/cpu_rmap.h> 47#include <linux/cpu_rmap.h>
48#include <linux/ptp_clock_kernel.h> 48#include <linux/ptp_clock_kernel.h>
49#include <net/xdp.h>
49 50
50#include <linux/mlx4/device.h> 51#include <linux/mlx4/device.h>
51#include <linux/mlx4/qp.h> 52#include <linux/mlx4/qp.h>
@@ -356,6 +357,7 @@ struct mlx4_en_rx_ring {
356 unsigned long dropped; 357 unsigned long dropped;
357 int hwtstamp_rx_filter; 358 int hwtstamp_rx_filter;
358 cpumask_var_t affinity_mask; 359 cpumask_var_t affinity_mask;
360 struct xdp_rxq_info xdp_rxq;
359}; 361};
360 362
361struct mlx4_en_cq { 363struct mlx4_en_cq {
@@ -720,7 +722,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev);
720void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv); 722void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv);
721int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, 723int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
722 struct mlx4_en_rx_ring **pring, 724 struct mlx4_en_rx_ring **pring,
723 u32 size, u16 stride, int node); 725 u32 size, u16 stride, int node, int queue_index);
724void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, 726void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
725 struct mlx4_en_rx_ring **pring, 727 struct mlx4_en_rx_ring **pring,
726 u32 size, u16 stride); 728 u32 size, u16 stride);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 543060c305a0..5299310f2481 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -46,6 +46,7 @@
46#include <linux/mlx5/transobj.h> 46#include <linux/mlx5/transobj.h>
47#include <linux/rhashtable.h> 47#include <linux/rhashtable.h>
48#include <net/switchdev.h> 48#include <net/switchdev.h>
49#include <net/xdp.h>
49#include "wq.h" 50#include "wq.h"
50#include "mlx5_core.h" 51#include "mlx5_core.h"
51#include "en_stats.h" 52#include "en_stats.h"
@@ -571,6 +572,9 @@ struct mlx5e_rq {
571 u32 rqn; 572 u32 rqn;
572 struct mlx5_core_dev *mdev; 573 struct mlx5_core_dev *mdev;
573 struct mlx5_core_mkey umr_mkey; 574 struct mlx5_core_mkey umr_mkey;
575
576 /* XDP read-mostly */
577 struct xdp_rxq_info xdp_rxq;
574} ____cacheline_aligned_in_smp; 578} ____cacheline_aligned_in_smp;
575 579
576struct mlx5e_channel { 580struct mlx5e_channel {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3aa1c90e7c86..539bd1d24396 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -582,6 +582,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
582 goto err_rq_wq_destroy; 582 goto err_rq_wq_destroy;
583 } 583 }
584 584
585 if (xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix) < 0)
586 goto err_rq_wq_destroy;
587
585 rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; 588 rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
586 rq->buff.headroom = params->rq_headroom; 589 rq->buff.headroom = params->rq_headroom;
587 590
@@ -687,6 +690,7 @@ err_destroy_umr_mkey:
687err_rq_wq_destroy: 690err_rq_wq_destroy:
688 if (rq->xdp_prog) 691 if (rq->xdp_prog)
689 bpf_prog_put(rq->xdp_prog); 692 bpf_prog_put(rq->xdp_prog);
693 xdp_rxq_info_unreg(&rq->xdp_rxq);
690 mlx5_wq_destroy(&rq->wq_ctrl); 694 mlx5_wq_destroy(&rq->wq_ctrl);
691 695
692 return err; 696 return err;
@@ -699,6 +703,8 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
699 if (rq->xdp_prog) 703 if (rq->xdp_prog)
700 bpf_prog_put(rq->xdp_prog); 704 bpf_prog_put(rq->xdp_prog);
701 705
706 xdp_rxq_info_unreg(&rq->xdp_rxq);
707
702 switch (rq->wq_type) { 708 switch (rq->wq_type) {
703 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: 709 case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
704 mlx5e_rq_free_mpwqe_info(rq); 710 mlx5e_rq_free_mpwqe_info(rq);
@@ -2766,6 +2772,9 @@ static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
2766 if (err) 2772 if (err)
2767 return err; 2773 return err;
2768 2774
2775 /* Mark as unused given "Drop-RQ" packets never reach XDP */
2776 xdp_rxq_info_unused(&rq->xdp_rxq);
2777
2769 rq->mdev = mdev; 2778 rq->mdev = mdev;
2770 2779
2771 return 0; 2780 return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 5b499c7a698f..7b38480811d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -812,6 +812,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
812 xdp_set_data_meta_invalid(&xdp); 812 xdp_set_data_meta_invalid(&xdp);
813 xdp.data_end = xdp.data + *len; 813 xdp.data_end = xdp.data + *len;
814 xdp.data_hard_start = va; 814 xdp.data_hard_start = va;
815 xdp.rxq = &rq->xdp_rxq;
815 816
816 act = bpf_prog_run_xdp(prog, &xdp); 817 act = bpf_prog_run_xdp(prog, &xdp);
817 switch (act) { 818 switch (act) {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index aae1be9ed056..89a9b6393882 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -238,7 +238,7 @@ struct nfp_bpf_vnic {
238 238
239int nfp_bpf_jit(struct nfp_prog *prog); 239int nfp_bpf_jit(struct nfp_prog *prog);
240 240
241extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops; 241extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops;
242 242
243struct netdev_bpf; 243struct netdev_bpf;
244struct nfp_app; 244struct nfp_app;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 9c2608445bd8..d8870c2f11f3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -260,6 +260,6 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
260 return 0; 260 return 0;
261} 261}
262 262
263const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { 263const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
264 .insn_hook = nfp_verify_insn, 264 .insn_hook = nfp_verify_insn,
265}; 265};
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 3801c52098d5..0e564cfabe7e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -47,6 +47,7 @@
47#include <linux/netdevice.h> 47#include <linux/netdevice.h>
48#include <linux/pci.h> 48#include <linux/pci.h>
49#include <linux/io-64-nonatomic-hi-lo.h> 49#include <linux/io-64-nonatomic-hi-lo.h>
50#include <net/xdp.h>
50 51
51#include "nfp_net_ctrl.h" 52#include "nfp_net_ctrl.h"
52 53
@@ -350,6 +351,7 @@ struct nfp_net_rx_buf {
350 * @rxds: Virtual address of FL/RX ring in host memory 351 * @rxds: Virtual address of FL/RX ring in host memory
351 * @dma: DMA address of the FL/RX ring 352 * @dma: DMA address of the FL/RX ring
352 * @size: Size, in bytes, of the FL/RX ring (needed to free) 353 * @size: Size, in bytes, of the FL/RX ring (needed to free)
354 * @xdp_rxq: RX-ring info avail for XDP
353 */ 355 */
354struct nfp_net_rx_ring { 356struct nfp_net_rx_ring {
355 struct nfp_net_r_vector *r_vec; 357 struct nfp_net_r_vector *r_vec;
@@ -361,13 +363,14 @@ struct nfp_net_rx_ring {
361 u32 idx; 363 u32 idx;
362 364
363 int fl_qcidx; 365 int fl_qcidx;
366 unsigned int size;
364 u8 __iomem *qcp_fl; 367 u8 __iomem *qcp_fl;
365 368
366 struct nfp_net_rx_buf *rxbufs; 369 struct nfp_net_rx_buf *rxbufs;
367 struct nfp_net_rx_desc *rxds; 370 struct nfp_net_rx_desc *rxds;
368 371
369 dma_addr_t dma; 372 dma_addr_t dma;
370 unsigned int size; 373 struct xdp_rxq_info xdp_rxq;
371} ____cacheline_aligned; 374} ____cacheline_aligned;
372 375
373/** 376/**
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 29c0947f6d70..05e071b3dc5b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1608,11 +1608,13 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
1608 unsigned int true_bufsz; 1608 unsigned int true_bufsz;
1609 struct sk_buff *skb; 1609 struct sk_buff *skb;
1610 int pkts_polled = 0; 1610 int pkts_polled = 0;
1611 struct xdp_buff xdp;
1611 int idx; 1612 int idx;
1612 1613
1613 rcu_read_lock(); 1614 rcu_read_lock();
1614 xdp_prog = READ_ONCE(dp->xdp_prog); 1615 xdp_prog = READ_ONCE(dp->xdp_prog);
1615 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; 1616 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
1617 xdp.rxq = &rx_ring->xdp_rxq;
1616 tx_ring = r_vec->xdp_ring; 1618 tx_ring = r_vec->xdp_ring;
1617 1619
1618 while (pkts_polled < budget) { 1620 while (pkts_polled < budget) {
@@ -1703,7 +1705,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
1703 dp->bpf_offload_xdp) && !meta.portid) { 1705 dp->bpf_offload_xdp) && !meta.portid) {
1704 void *orig_data = rxbuf->frag + pkt_off; 1706 void *orig_data = rxbuf->frag + pkt_off;
1705 unsigned int dma_off; 1707 unsigned int dma_off;
1706 struct xdp_buff xdp;
1707 int act; 1708 int act;
1708 1709
1709 xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; 1710 xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
@@ -2252,6 +2253,7 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
2252 struct nfp_net_r_vector *r_vec = rx_ring->r_vec; 2253 struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
2253 struct nfp_net_dp *dp = &r_vec->nfp_net->dp; 2254 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
2254 2255
2256 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
2255 kfree(rx_ring->rxbufs); 2257 kfree(rx_ring->rxbufs);
2256 2258
2257 if (rx_ring->rxds) 2259 if (rx_ring->rxds)
@@ -2275,7 +2277,11 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
2275static int 2277static int
2276nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) 2278nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
2277{ 2279{
2278 int sz; 2280 int sz, err;
2281
2282 err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx);
2283 if (err < 0)
2284 return err;
2279 2285
2280 rx_ring->cnt = dp->rxd_cnt; 2286 rx_ring->cnt = dp->rxd_cnt;
2281 rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; 2287 rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 8e01b53765dd..9935978c5542 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -40,6 +40,7 @@
40#include <linux/kernel.h> 40#include <linux/kernel.h>
41#include <linux/mutex.h> 41#include <linux/mutex.h>
42#include <linux/bpf.h> 42#include <linux/bpf.h>
43#include <net/xdp.h>
43#include <linux/qed/qede_rdma.h> 44#include <linux/qed/qede_rdma.h>
44#include <linux/io.h> 45#include <linux/io.h>
45#ifdef CONFIG_RFS_ACCEL 46#ifdef CONFIG_RFS_ACCEL
@@ -345,6 +346,7 @@ struct qede_rx_queue {
345 u64 xdp_no_pass; 346 u64 xdp_no_pass;
346 347
347 void *handle; 348 void *handle;
349 struct xdp_rxq_info xdp_rxq;
348}; 350};
349 351
350union db_prod { 352union db_prod {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 48ec4c56cddf..dafc079ab6b9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1006,6 +1006,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
1006 xdp.data = xdp.data_hard_start + *data_offset; 1006 xdp.data = xdp.data_hard_start + *data_offset;
1007 xdp_set_data_meta_invalid(&xdp); 1007 xdp_set_data_meta_invalid(&xdp);
1008 xdp.data_end = xdp.data + *len; 1008 xdp.data_end = xdp.data + *len;
1009 xdp.rxq = &rxq->xdp_rxq;
1009 1010
1010 /* Queues always have a full reset currently, so for the time 1011 /* Queues always have a full reset currently, so for the time
1011 * being until there's atomic program replace just mark read 1012 * being until there's atomic program replace just mark read
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 9292ca25c40c..2db70eabddfe 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -765,6 +765,12 @@ static void qede_free_fp_array(struct qede_dev *edev)
765 fp = &edev->fp_array[i]; 765 fp = &edev->fp_array[i];
766 766
767 kfree(fp->sb_info); 767 kfree(fp->sb_info);
768 /* Handle mem alloc failure case where qede_init_fp
769 * didn't register xdp_rxq_info yet.
770 * Implicit only (fp->type & QEDE_FASTPATH_RX)
771 */
772 if (fp->rxq && xdp_rxq_info_is_reg(&fp->rxq->xdp_rxq))
773 xdp_rxq_info_unreg(&fp->rxq->xdp_rxq);
768 kfree(fp->rxq); 774 kfree(fp->rxq);
769 kfree(fp->xdp_tx); 775 kfree(fp->xdp_tx);
770 kfree(fp->txq); 776 kfree(fp->txq);
@@ -1493,6 +1499,10 @@ static void qede_init_fp(struct qede_dev *edev)
1493 else 1499 else
1494 fp->rxq->data_direction = DMA_FROM_DEVICE; 1500 fp->rxq->data_direction = DMA_FROM_DEVICE;
1495 fp->rxq->dev = &edev->pdev->dev; 1501 fp->rxq->dev = &edev->pdev->dev;
1502
1503 /* Driver have no error path from here */
1504 WARN_ON(xdp_rxq_info_reg(&fp->rxq->xdp_rxq, edev->ndev,
1505 fp->rxq->rxq_id) < 0);
1496 } 1506 }
1497 1507
1498 if (fp->type & QEDE_FASTPATH_TX) { 1508 if (fp->type & QEDE_FASTPATH_TX) {
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index a243fa7ae02f..5134d5c1306c 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -66,7 +66,7 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
66 return 0; 66 return 0;
67} 67}
68 68
69static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = { 69static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
70 .insn_hook = nsim_bpf_verify_insn, 70 .insn_hook = nsim_bpf_verify_insn,
71}; 71};
72 72
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d6310353..e7c5f4b2a9a6 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -180,6 +180,7 @@ struct tun_file {
180 struct list_head next; 180 struct list_head next;
181 struct tun_struct *detached; 181 struct tun_struct *detached;
182 struct skb_array tx_array; 182 struct skb_array tx_array;
183 struct xdp_rxq_info xdp_rxq;
183}; 184};
184 185
185struct tun_flow_entry { 186struct tun_flow_entry {
@@ -687,8 +688,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
687 tun->dev->reg_state == NETREG_REGISTERED) 688 tun->dev->reg_state == NETREG_REGISTERED)
688 unregister_netdevice(tun->dev); 689 unregister_netdevice(tun->dev);
689 } 690 }
690 if (tun) 691 if (tun) {
691 skb_array_cleanup(&tfile->tx_array); 692 skb_array_cleanup(&tfile->tx_array);
693 xdp_rxq_info_unreg(&tfile->xdp_rxq);
694 }
692 sock_put(&tfile->sk); 695 sock_put(&tfile->sk);
693 } 696 }
694} 697}
@@ -728,11 +731,13 @@ static void tun_detach_all(struct net_device *dev)
728 tun_napi_del(tun, tfile); 731 tun_napi_del(tun, tfile);
729 /* Drop read queue */ 732 /* Drop read queue */
730 tun_queue_purge(tfile); 733 tun_queue_purge(tfile);
734 xdp_rxq_info_unreg(&tfile->xdp_rxq);
731 sock_put(&tfile->sk); 735 sock_put(&tfile->sk);
732 } 736 }
733 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { 737 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
734 tun_enable_queue(tfile); 738 tun_enable_queue(tfile);
735 tun_queue_purge(tfile); 739 tun_queue_purge(tfile);
740 xdp_rxq_info_unreg(&tfile->xdp_rxq);
736 sock_put(&tfile->sk); 741 sock_put(&tfile->sk);
737 } 742 }
738 BUG_ON(tun->numdisabled != 0); 743 BUG_ON(tun->numdisabled != 0);
@@ -784,6 +789,22 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
784 789
785 tfile->queue_index = tun->numqueues; 790 tfile->queue_index = tun->numqueues;
786 tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; 791 tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
792
793 if (tfile->detached) {
794 /* Re-attach detached tfile, updating XDP queue_index */
795 WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
796
797 if (tfile->xdp_rxq.queue_index != tfile->queue_index)
798 tfile->xdp_rxq.queue_index = tfile->queue_index;
799 } else {
800 /* Setup XDP RX-queue info, for new tfile getting attached */
801 err = xdp_rxq_info_reg(&tfile->xdp_rxq,
802 tun->dev, tfile->queue_index);
803 if (err < 0)
804 goto out;
805 err = 0;
806 }
807
787 rcu_assign_pointer(tfile->tun, tun); 808 rcu_assign_pointer(tfile->tun, tun);
788 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); 809 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
789 tun->numqueues++; 810 tun->numqueues++;
@@ -1508,6 +1529,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
1508 xdp.data = buf + pad; 1529 xdp.data = buf + pad;
1509 xdp_set_data_meta_invalid(&xdp); 1530 xdp_set_data_meta_invalid(&xdp);
1510 xdp.data_end = xdp.data + len; 1531 xdp.data_end = xdp.data + len;
1532 xdp.rxq = &tfile->xdp_rxq;
1511 orig_data = xdp.data; 1533 orig_data = xdp.data;
1512 act = bpf_prog_run_xdp(xdp_prog, &xdp); 1534 act = bpf_prog_run_xdp(xdp_prog, &xdp);
1513 1535
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6fb7b658a6cc..ed8299343728 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -31,6 +31,7 @@
31#include <linux/average.h> 31#include <linux/average.h>
32#include <linux/filter.h> 32#include <linux/filter.h>
33#include <net/route.h> 33#include <net/route.h>
34#include <net/xdp.h>
34 35
35static int napi_weight = NAPI_POLL_WEIGHT; 36static int napi_weight = NAPI_POLL_WEIGHT;
36module_param(napi_weight, int, 0444); 37module_param(napi_weight, int, 0444);
@@ -115,6 +116,8 @@ struct receive_queue {
115 116
116 /* Name of this receive queue: input.$index */ 117 /* Name of this receive queue: input.$index */
117 char name[40]; 118 char name[40];
119
120 struct xdp_rxq_info xdp_rxq;
118}; 121};
119 122
120struct virtnet_info { 123struct virtnet_info {
@@ -559,6 +562,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
559 xdp.data = xdp.data_hard_start + xdp_headroom; 562 xdp.data = xdp.data_hard_start + xdp_headroom;
560 xdp_set_data_meta_invalid(&xdp); 563 xdp_set_data_meta_invalid(&xdp);
561 xdp.data_end = xdp.data + len; 564 xdp.data_end = xdp.data + len;
565 xdp.rxq = &rq->xdp_rxq;
562 orig_data = xdp.data; 566 orig_data = xdp.data;
563 act = bpf_prog_run_xdp(xdp_prog, &xdp); 567 act = bpf_prog_run_xdp(xdp_prog, &xdp);
564 568
@@ -692,6 +696,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
692 xdp.data = data + vi->hdr_len; 696 xdp.data = data + vi->hdr_len;
693 xdp_set_data_meta_invalid(&xdp); 697 xdp_set_data_meta_invalid(&xdp);
694 xdp.data_end = xdp.data + (len - vi->hdr_len); 698 xdp.data_end = xdp.data + (len - vi->hdr_len);
699 xdp.rxq = &rq->xdp_rxq;
700
695 act = bpf_prog_run_xdp(xdp_prog, &xdp); 701 act = bpf_prog_run_xdp(xdp_prog, &xdp);
696 702
697 if (act != XDP_PASS) 703 if (act != XDP_PASS)
@@ -1225,13 +1231,18 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
1225static int virtnet_open(struct net_device *dev) 1231static int virtnet_open(struct net_device *dev)
1226{ 1232{
1227 struct virtnet_info *vi = netdev_priv(dev); 1233 struct virtnet_info *vi = netdev_priv(dev);
1228 int i; 1234 int i, err;
1229 1235
1230 for (i = 0; i < vi->max_queue_pairs; i++) { 1236 for (i = 0; i < vi->max_queue_pairs; i++) {
1231 if (i < vi->curr_queue_pairs) 1237 if (i < vi->curr_queue_pairs)
1232 /* Make sure we have some buffers: if oom use wq. */ 1238 /* Make sure we have some buffers: if oom use wq. */
1233 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL)) 1239 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1234 schedule_delayed_work(&vi->refill, 0); 1240 schedule_delayed_work(&vi->refill, 0);
1241
1242 err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i);
1243 if (err < 0)
1244 return err;
1245
1235 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); 1246 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
1236 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi); 1247 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
1237 } 1248 }
@@ -1560,6 +1571,7 @@ static int virtnet_close(struct net_device *dev)
1560 cancel_delayed_work_sync(&vi->refill); 1571 cancel_delayed_work_sync(&vi->refill);
1561 1572
1562 for (i = 0; i < vi->max_queue_pairs; i++) { 1573 for (i = 0; i < vi->max_queue_pairs; i++) {
1574 xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
1563 napi_disable(&vi->rq[i].napi); 1575 napi_disable(&vi->rq[i].napi);
1564 virtnet_napi_tx_disable(&vi->sq[i].napi); 1576 virtnet_napi_tx_disable(&vi->sq[i].napi);
1565 } 1577 }
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 7c6f76d29f56..36b0772701a0 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -103,14 +103,14 @@ slow:
103 goto got_it; 103 goto got_it;
104} 104}
105 105
106void *ns_get_path(struct path *path, struct task_struct *task, 106void *ns_get_path_cb(struct path *path, ns_get_path_helper_t *ns_get_cb,
107 const struct proc_ns_operations *ns_ops) 107 void *private_data)
108{ 108{
109 struct ns_common *ns; 109 struct ns_common *ns;
110 void *ret; 110 void *ret;
111 111
112again: 112again:
113 ns = ns_ops->get(task); 113 ns = ns_get_cb(private_data);
114 if (!ns) 114 if (!ns)
115 return ERR_PTR(-ENOENT); 115 return ERR_PTR(-ENOENT);
116 116
@@ -120,6 +120,29 @@ again:
120 return ret; 120 return ret;
121} 121}
122 122
123struct ns_get_path_task_args {
124 const struct proc_ns_operations *ns_ops;
125 struct task_struct *task;
126};
127
128static struct ns_common *ns_get_path_task(void *private_data)
129{
130 struct ns_get_path_task_args *args = private_data;
131
132 return args->ns_ops->get(args->task);
133}
134
135void *ns_get_path(struct path *path, struct task_struct *task,
136 const struct proc_ns_operations *ns_ops)
137{
138 struct ns_get_path_task_args args = {
139 .ns_ops = ns_ops,
140 .task = task,
141 };
142
143 return ns_get_path_cb(path, ns_get_path_task, &args);
144}
145
123int open_related_ns(struct ns_common *ns, 146int open_related_ns(struct ns_common *ns,
124 struct ns_common *(*get_ns)(struct ns_common *ns)) 147 struct ns_common *(*get_ns)(struct ns_common *ns))
125{ 148{
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index da54ef644fcd..9e03046d1df2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -17,6 +17,7 @@
17#include <linux/numa.h> 17#include <linux/numa.h>
18#include <linux/wait.h> 18#include <linux/wait.h>
19 19
20struct bpf_verifier_env;
20struct perf_event; 21struct perf_event;
21struct bpf_prog; 22struct bpf_prog;
22struct bpf_map; 23struct bpf_map;
@@ -184,14 +185,18 @@ struct bpf_verifier_ops {
184 struct bpf_prog *prog, u32 *target_size); 185 struct bpf_prog *prog, u32 *target_size);
185}; 186};
186 187
188struct bpf_prog_offload_ops {
189 int (*insn_hook)(struct bpf_verifier_env *env,
190 int insn_idx, int prev_insn_idx);
191};
192
187struct bpf_dev_offload { 193struct bpf_dev_offload {
188 struct bpf_prog *prog; 194 struct bpf_prog *prog;
189 struct net_device *netdev; 195 struct net_device *netdev;
190 void *dev_priv; 196 void *dev_priv;
191 struct list_head offloads; 197 struct list_head offloads;
192 bool dev_state; 198 bool dev_state;
193 bool verifier_running; 199 const struct bpf_prog_offload_ops *dev_ops;
194 wait_queue_head_t verifier_done;
195}; 200};
196 201
197struct bpf_prog_aux { 202struct bpf_prog_aux {
@@ -201,6 +206,7 @@ struct bpf_prog_aux {
201 u32 stack_depth; 206 u32 stack_depth;
202 u32 id; 207 u32 id;
203 u32 func_cnt; 208 u32 func_cnt;
209 bool offload_requested;
204 struct bpf_prog **func; 210 struct bpf_prog **func;
205 void *jit_data; /* JIT specific data. arch dependent */ 211 void *jit_data; /* JIT specific data. arch dependent */
206 struct latch_tree_node ksym_tnode; 212 struct latch_tree_node ksym_tnode;
@@ -351,6 +357,8 @@ void bpf_prog_put(struct bpf_prog *prog);
351int __bpf_prog_charge(struct user_struct *user, u32 pages); 357int __bpf_prog_charge(struct user_struct *user, u32 pages);
352void __bpf_prog_uncharge(struct user_struct *user, u32 pages); 358void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
353 359
360void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
361
354struct bpf_map *bpf_map_get_with_uref(u32 ufd); 362struct bpf_map *bpf_map_get_with_uref(u32 ufd);
355struct bpf_map *__bpf_map_get(struct fd f); 363struct bpf_map *__bpf_map_get(struct fd f);
356struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); 364struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
@@ -523,13 +531,15 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
523 531
524int bpf_prog_offload_compile(struct bpf_prog *prog); 532int bpf_prog_offload_compile(struct bpf_prog *prog);
525void bpf_prog_offload_destroy(struct bpf_prog *prog); 533void bpf_prog_offload_destroy(struct bpf_prog *prog);
534int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
535 struct bpf_prog *prog);
526 536
527#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) 537#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
528int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); 538int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
529 539
530static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) 540static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
531{ 541{
532 return aux->offload; 542 return aux->offload_requested;
533} 543}
534#else 544#else
535static inline int bpf_prog_offload_init(struct bpf_prog *prog, 545static inline int bpf_prog_offload_init(struct bpf_prog *prog,
@@ -544,7 +554,7 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
544} 554}
545#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ 555#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
546 556
547#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) 557#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
548struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); 558struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
549int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); 559int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
550#else 560#else
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 978c1d9c9383..19b8349a3809 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -42,7 +42,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
42BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) 42BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
43#ifdef CONFIG_NET 43#ifdef CONFIG_NET
44BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) 44BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
45#ifdef CONFIG_STREAM_PARSER 45#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
46BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) 46BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
47#endif 47#endif
48BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) 48BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 883a35d50cd5..2feb218c001d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -166,12 +166,6 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
166 return log->len_used >= log->len_total - 1; 166 return log->len_used >= log->len_total - 1;
167} 167}
168 168
169struct bpf_verifier_env;
170struct bpf_ext_analyzer_ops {
171 int (*insn_hook)(struct bpf_verifier_env *env,
172 int insn_idx, int prev_insn_idx);
173};
174
175#define BPF_MAX_SUBPROGS 256 169#define BPF_MAX_SUBPROGS 256
176 170
177/* single container for all structs 171/* single container for all structs
@@ -185,7 +179,6 @@ struct bpf_verifier_env {
185 bool strict_alignment; /* perform strict pointer alignment checks */ 179 bool strict_alignment; /* perform strict pointer alignment checks */
186 struct bpf_verifier_state *cur_state; /* current verifier state */ 180 struct bpf_verifier_state *cur_state; /* current verifier state */
187 struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ 181 struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
188 const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */
189 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ 182 struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
190 u32 used_map_cnt; /* number of used maps */ 183 u32 used_map_cnt; /* number of used maps */
191 u32 id_gen; /* used to generate unique reg IDs */ 184 u32 id_gen; /* used to generate unique reg IDs */
@@ -206,13 +199,8 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
206 return cur->frame[cur->curframe]->regs; 199 return cur->frame[cur->curframe]->regs;
207} 200}
208 201
209#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
210int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); 202int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
211#else 203int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
212static inline int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) 204 int insn_idx, int prev_insn_idx);
213{
214 return -EOPNOTSUPP;
215}
216#endif
217 205
218#endif /* _LINUX_BPF_VERIFIER_H */ 206#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2b0df2703671..425056c7f96c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -20,6 +20,7 @@
20#include <linux/set_memory.h> 20#include <linux/set_memory.h>
21#include <linux/kallsyms.h> 21#include <linux/kallsyms.h>
22 22
23#include <net/xdp.h>
23#include <net/sch_generic.h> 24#include <net/sch_generic.h>
24 25
25#include <uapi/linux/filter.h> 26#include <uapi/linux/filter.h>
@@ -503,6 +504,7 @@ struct xdp_buff {
503 void *data_end; 504 void *data_end;
504 void *data_meta; 505 void *data_meta;
505 void *data_hard_start; 506 void *data_hard_start;
507 struct xdp_rxq_info *rxq;
506}; 508};
507 509
508/* Compute the linear packet data range [data, data_end) which 510/* Compute the linear packet data range [data, data_end) which
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 352066e4eeef..440b000f07f4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -44,6 +44,7 @@
44#include <net/dcbnl.h> 44#include <net/dcbnl.h>
45#endif 45#endif
46#include <net/netprio_cgroup.h> 46#include <net/netprio_cgroup.h>
47#include <net/xdp.h>
47 48
48#include <linux/netdev_features.h> 49#include <linux/netdev_features.h>
49#include <linux/neighbour.h> 50#include <linux/neighbour.h>
@@ -686,6 +687,7 @@ struct netdev_rx_queue {
686#endif 687#endif
687 struct kobject kobj; 688 struct kobject kobj;
688 struct net_device *dev; 689 struct net_device *dev;
690 struct xdp_rxq_info xdp_rxq;
689} ____cacheline_aligned_in_smp; 691} ____cacheline_aligned_in_smp;
690 692
691/* 693/*
@@ -804,7 +806,7 @@ enum bpf_netdev_command {
804 BPF_OFFLOAD_DESTROY, 806 BPF_OFFLOAD_DESTROY,
805}; 807};
806 808
807struct bpf_ext_analyzer_ops; 809struct bpf_prog_offload_ops;
808struct netlink_ext_ack; 810struct netlink_ext_ack;
809 811
810struct netdev_bpf { 812struct netdev_bpf {
@@ -826,7 +828,7 @@ struct netdev_bpf {
826 /* BPF_OFFLOAD_VERIFIER_PREP */ 828 /* BPF_OFFLOAD_VERIFIER_PREP */
827 struct { 829 struct {
828 struct bpf_prog *prog; 830 struct bpf_prog *prog;
829 const struct bpf_ext_analyzer_ops *ops; /* callee set */ 831 const struct bpf_prog_offload_ops *ops; /* callee set */
830 } verifier; 832 } verifier;
831 /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ 833 /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */
832 struct { 834 struct {
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2ff18c9840a7..d31cb6215905 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -78,6 +78,9 @@ extern struct file *proc_ns_fget(int fd);
78#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) 78#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
79extern void *ns_get_path(struct path *path, struct task_struct *task, 79extern void *ns_get_path(struct path *path, struct task_struct *task,
80 const struct proc_ns_operations *ns_ops); 80 const struct proc_ns_operations *ns_ops);
81typedef struct ns_common *ns_get_path_helper_t(void *);
82extern void *ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
83 void *private_data);
81 84
82extern int ns_get_name(char *buf, size_t size, struct task_struct *task, 85extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
83 const struct proc_ns_operations *ns_ops); 86 const struct proc_ns_operations *ns_ops);
diff --git a/include/net/xdp.h b/include/net/xdp.h
new file mode 100644
index 000000000000..b2362ddfa694
--- /dev/null
+++ b/include/net/xdp.h
@@ -0,0 +1,48 @@
1/* include/net/xdp.h
2 *
3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
4 * Released under terms in GPL version 2. See COPYING.
5 */
6#ifndef __LINUX_NET_XDP_H__
7#define __LINUX_NET_XDP_H__
8
9/**
10 * DOC: XDP RX-queue information
11 *
12 * The XDP RX-queue info (xdp_rxq_info) is associated with the driver
13 * level RX-ring queues. It is information that is specific to how
14 * the driver have configured a given RX-ring queue.
15 *
16 * Each xdp_buff frame received in the driver carry a (pointer)
17 * reference to this xdp_rxq_info structure. This provides the XDP
18 * data-path read-access to RX-info for both kernel and bpf-side
19 * (limited subset).
20 *
21 * For now, direct access is only safe while running in NAPI/softirq
22 * context. Contents is read-mostly and must not be updated during
23 * driver NAPI/softirq poll.
24 *
25 * The driver usage API is a register and unregister API.
26 *
27 * The struct is not directly tied to the XDP prog. A new XDP prog
28 * can be attached as long as it doesn't change the underlying
29 * RX-ring. If the RX-ring does change significantly, the NIC driver
30 * naturally need to stop the RX-ring before purging and reallocating
31 * memory. In that process the driver MUST call unregistor (which
32 * also apply for driver shutdown and unload). The register API is
33 * also mandatory during RX-ring setup.
34 */
35
36struct xdp_rxq_info {
37 struct net_device *dev;
38 u32 queue_index;
39 u32 reg_state;
40} ____cacheline_aligned; /* perf critical, avoid false-sharing */
41
42int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
43 struct net_device *dev, u32 queue_index);
44void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq);
45void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
46bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
47
48#endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 69eabfcb9bdb..405317f9c064 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -899,6 +899,9 @@ struct xdp_md {
899 __u32 data; 899 __u32 data;
900 __u32 data_end; 900 __u32 data_end;
901 __u32 data_meta; 901 __u32 data_meta;
902 /* Below access go though struct xdp_rxq_info */
903 __u32 ingress_ifindex; /* rxq->dev->ifindex */
904 __u32 rx_queue_index; /* rxq->queue_index */
902}; 905};
903 906
904enum sk_action { 907enum sk_action {
@@ -921,6 +924,9 @@ struct bpf_prog_info {
921 __u32 nr_map_ids; 924 __u32 nr_map_ids;
922 __aligned_u64 map_ids; 925 __aligned_u64 map_ids;
923 char name[BPF_OBJ_NAME_LEN]; 926 char name[BPF_OBJ_NAME_LEN];
927 __u32 ifindex;
928 __u64 netns_dev;
929 __u64 netns_ino;
924} __attribute__((aligned(8))); 930} __attribute__((aligned(8)));
925 931
926struct bpf_map_info { 932struct bpf_map_info {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e691da0b3bab..a713fd23ec88 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -9,9 +9,11 @@ obj-$(CONFIG_BPF_SYSCALL) += devmap.o
9obj-$(CONFIG_BPF_SYSCALL) += cpumap.o 9obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
10obj-$(CONFIG_BPF_SYSCALL) += offload.o 10obj-$(CONFIG_BPF_SYSCALL) += offload.o
11ifeq ($(CONFIG_STREAM_PARSER),y) 11ifeq ($(CONFIG_STREAM_PARSER),y)
12ifeq ($(CONFIG_INET),y)
12obj-$(CONFIG_BPF_SYSCALL) += sockmap.o 13obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
13endif 14endif
14endif 15endif
16endif
15ifeq ($(CONFIG_PERF_EVENTS),y) 17ifeq ($(CONFIG_PERF_EVENTS),y)
16obj-$(CONFIG_BPF_SYSCALL) += stackmap.o 18obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
17endif 19endif
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8455b89d1bbf..040d4e0edf3f 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -16,17 +16,22 @@
16#include <linux/bpf.h> 16#include <linux/bpf.h>
17#include <linux/bpf_verifier.h> 17#include <linux/bpf_verifier.h>
18#include <linux/bug.h> 18#include <linux/bug.h>
19#include <linux/kdev_t.h>
19#include <linux/list.h> 20#include <linux/list.h>
20#include <linux/netdevice.h> 21#include <linux/netdevice.h>
21#include <linux/printk.h> 22#include <linux/printk.h>
23#include <linux/proc_ns.h>
22#include <linux/rtnetlink.h> 24#include <linux/rtnetlink.h>
25#include <linux/rwsem.h>
23 26
24/* protected by RTNL */ 27/* Protects bpf_prog_offload_devs and offload members of all progs.
28 * RTNL lock cannot be taken when holding this lock.
29 */
30static DECLARE_RWSEM(bpf_devs_lock);
25static LIST_HEAD(bpf_prog_offload_devs); 31static LIST_HEAD(bpf_prog_offload_devs);
26 32
27int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) 33int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
28{ 34{
29 struct net *net = current->nsproxy->net_ns;
30 struct bpf_dev_offload *offload; 35 struct bpf_dev_offload *offload;
31 36
32 if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS && 37 if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
@@ -41,32 +46,40 @@ int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
41 return -ENOMEM; 46 return -ENOMEM;
42 47
43 offload->prog = prog; 48 offload->prog = prog;
44 init_waitqueue_head(&offload->verifier_done);
45 49
46 rtnl_lock(); 50 offload->netdev = dev_get_by_index(current->nsproxy->net_ns,
47 offload->netdev = __dev_get_by_index(net, attr->prog_ifindex); 51 attr->prog_ifindex);
48 if (!offload->netdev) { 52 if (!offload->netdev)
49 rtnl_unlock(); 53 goto err_free;
50 kfree(offload);
51 return -EINVAL;
52 }
53 54
55 down_write(&bpf_devs_lock);
56 if (offload->netdev->reg_state != NETREG_REGISTERED)
57 goto err_unlock;
54 prog->aux->offload = offload; 58 prog->aux->offload = offload;
55 list_add_tail(&offload->offloads, &bpf_prog_offload_devs); 59 list_add_tail(&offload->offloads, &bpf_prog_offload_devs);
56 rtnl_unlock(); 60 dev_put(offload->netdev);
61 up_write(&bpf_devs_lock);
57 62
58 return 0; 63 return 0;
64err_unlock:
65 up_write(&bpf_devs_lock);
66 dev_put(offload->netdev);
67err_free:
68 kfree(offload);
69 return -EINVAL;
59} 70}
60 71
61static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd, 72static int __bpf_offload_ndo(struct bpf_prog *prog, enum bpf_netdev_command cmd,
62 struct netdev_bpf *data) 73 struct netdev_bpf *data)
63{ 74{
64 struct net_device *netdev = prog->aux->offload->netdev; 75 struct bpf_dev_offload *offload = prog->aux->offload;
76 struct net_device *netdev;
65 77
66 ASSERT_RTNL(); 78 ASSERT_RTNL();
67 79
68 if (!netdev) 80 if (!offload)
69 return -ENODEV; 81 return -ENODEV;
82 netdev = offload->netdev;
70 if (!netdev->netdev_ops->ndo_bpf) 83 if (!netdev->netdev_ops->ndo_bpf)
71 return -EOPNOTSUPP; 84 return -EOPNOTSUPP;
72 85
@@ -87,62 +100,63 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env)
87 if (err) 100 if (err)
88 goto exit_unlock; 101 goto exit_unlock;
89 102
90 env->dev_ops = data.verifier.ops; 103 env->prog->aux->offload->dev_ops = data.verifier.ops;
91
92 env->prog->aux->offload->dev_state = true; 104 env->prog->aux->offload->dev_state = true;
93 env->prog->aux->offload->verifier_running = true;
94exit_unlock: 105exit_unlock:
95 rtnl_unlock(); 106 rtnl_unlock();
96 return err; 107 return err;
97} 108}
98 109
110int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
111 int insn_idx, int prev_insn_idx)
112{
113 struct bpf_dev_offload *offload;
114 int ret = -ENODEV;
115
116 down_read(&bpf_devs_lock);
117 offload = env->prog->aux->offload;
118 if (offload)
119 ret = offload->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
120 up_read(&bpf_devs_lock);
121
122 return ret;
123}
124
99static void __bpf_prog_offload_destroy(struct bpf_prog *prog) 125static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
100{ 126{
101 struct bpf_dev_offload *offload = prog->aux->offload; 127 struct bpf_dev_offload *offload = prog->aux->offload;
102 struct netdev_bpf data = {}; 128 struct netdev_bpf data = {};
103 129
104 /* Caution - if netdev is destroyed before the program, this function
105 * will be called twice.
106 */
107
108 data.offload.prog = prog; 130 data.offload.prog = prog;
109 131
110 if (offload->verifier_running)
111 wait_event(offload->verifier_done, !offload->verifier_running);
112
113 if (offload->dev_state) 132 if (offload->dev_state)
114 WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data)); 133 WARN_ON(__bpf_offload_ndo(prog, BPF_OFFLOAD_DESTROY, &data));
115 134
116 offload->dev_state = false; 135 /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */
136 bpf_prog_free_id(prog, true);
137
117 list_del_init(&offload->offloads); 138 list_del_init(&offload->offloads);
118 offload->netdev = NULL; 139 kfree(offload);
140 prog->aux->offload = NULL;
119} 141}
120 142
121void bpf_prog_offload_destroy(struct bpf_prog *prog) 143void bpf_prog_offload_destroy(struct bpf_prog *prog)
122{ 144{
123 struct bpf_dev_offload *offload = prog->aux->offload;
124
125 offload->verifier_running = false;
126 wake_up(&offload->verifier_done);
127
128 rtnl_lock(); 145 rtnl_lock();
129 __bpf_prog_offload_destroy(prog); 146 down_write(&bpf_devs_lock);
147 if (prog->aux->offload)
148 __bpf_prog_offload_destroy(prog);
149 up_write(&bpf_devs_lock);
130 rtnl_unlock(); 150 rtnl_unlock();
131
132 kfree(offload);
133} 151}
134 152
135static int bpf_prog_offload_translate(struct bpf_prog *prog) 153static int bpf_prog_offload_translate(struct bpf_prog *prog)
136{ 154{
137 struct bpf_dev_offload *offload = prog->aux->offload;
138 struct netdev_bpf data = {}; 155 struct netdev_bpf data = {};
139 int ret; 156 int ret;
140 157
141 data.offload.prog = prog; 158 data.offload.prog = prog;
142 159
143 offload->verifier_running = false;
144 wake_up(&offload->verifier_done);
145
146 rtnl_lock(); 160 rtnl_lock();
147 ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data); 161 ret = __bpf_offload_ndo(prog, BPF_OFFLOAD_TRANSLATE, &data);
148 rtnl_unlock(); 162 rtnl_unlock();
@@ -164,6 +178,63 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
164 return bpf_prog_offload_translate(prog); 178 return bpf_prog_offload_translate(prog);
165} 179}
166 180
181struct ns_get_path_bpf_prog_args {
182 struct bpf_prog *prog;
183 struct bpf_prog_info *info;
184};
185
186static struct ns_common *bpf_prog_offload_info_fill_ns(void *private_data)
187{
188 struct ns_get_path_bpf_prog_args *args = private_data;
189 struct bpf_prog_aux *aux = args->prog->aux;
190 struct ns_common *ns;
191 struct net *net;
192
193 rtnl_lock();
194 down_read(&bpf_devs_lock);
195
196 if (aux->offload) {
197 args->info->ifindex = aux->offload->netdev->ifindex;
198 net = dev_net(aux->offload->netdev);
199 get_net(net);
200 ns = &net->ns;
201 } else {
202 args->info->ifindex = 0;
203 ns = NULL;
204 }
205
206 up_read(&bpf_devs_lock);
207 rtnl_unlock();
208
209 return ns;
210}
211
212int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
213 struct bpf_prog *prog)
214{
215 struct ns_get_path_bpf_prog_args args = {
216 .prog = prog,
217 .info = info,
218 };
219 struct inode *ns_inode;
220 struct path ns_path;
221 void *res;
222
223 res = ns_get_path_cb(&ns_path, bpf_prog_offload_info_fill_ns, &args);
224 if (IS_ERR(res)) {
225 if (!info->ifindex)
226 return -ENODEV;
227 return PTR_ERR(res);
228 }
229
230 ns_inode = ns_path.dentry->d_inode;
231 info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
232 info->netns_ino = ns_inode->i_ino;
233 path_put(&ns_path);
234
235 return 0;
236}
237
167const struct bpf_prog_ops bpf_offload_prog_ops = { 238const struct bpf_prog_ops bpf_offload_prog_ops = {
168}; 239};
169 240
@@ -181,11 +252,13 @@ static int bpf_offload_notification(struct notifier_block *notifier,
181 if (netdev->reg_state != NETREG_UNREGISTERING) 252 if (netdev->reg_state != NETREG_UNREGISTERING)
182 break; 253 break;
183 254
255 down_write(&bpf_devs_lock);
184 list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, 256 list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
185 offloads) { 257 offloads) {
186 if (offload->netdev == netdev) 258 if (offload->netdev == netdev)
187 __bpf_prog_offload_destroy(offload->prog); 259 __bpf_prog_offload_destroy(offload->prog);
188 } 260 }
261 up_write(&bpf_devs_lock);
189 break; 262 break;
190 default: 263 default:
191 break; 264 break;
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 5ee2e41893d9..3f662ee23a34 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -96,14 +96,6 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
96 return rcu_dereference_sk_user_data(sk); 96 return rcu_dereference_sk_user_data(sk);
97} 97}
98 98
99/* compute the linear packet data range [data, data_end) for skb when
100 * sk_skb type programs are in use.
101 */
102static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
103{
104 TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
105}
106
107enum __sk_action { 99enum __sk_action {
108 __SK_DROP = 0, 100 __SK_DROP = 0,
109 __SK_PASS, 101 __SK_PASS,
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index a15bc636cc98..6c63c2222ea8 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -226,9 +226,33 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
226 return 0; 226 return 0;
227} 227}
228 228
229static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 229static int stack_map_get_next_key(struct bpf_map *map, void *key,
230 void *next_key)
230{ 231{
231 return -EINVAL; 232 struct bpf_stack_map *smap = container_of(map,
233 struct bpf_stack_map, map);
234 u32 id;
235
236 WARN_ON_ONCE(!rcu_read_lock_held());
237
238 if (!key) {
239 id = 0;
240 } else {
241 id = *(u32 *)key;
242 if (id >= smap->n_buckets || !smap->buckets[id])
243 id = 0;
244 else
245 id++;
246 }
247
248 while (id < smap->n_buckets && !smap->buckets[id])
249 id++;
250
251 if (id >= smap->n_buckets)
252 return -ENOENT;
253
254 *(u32 *)next_key = id;
255 return 0;
232} 256}
233 257
234static int stack_map_update_elem(struct bpf_map *map, void *key, void *value, 258static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 007802c5ca7d..ebf0fb23e237 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -905,9 +905,13 @@ static int bpf_prog_alloc_id(struct bpf_prog *prog)
905 return id > 0 ? 0 : id; 905 return id > 0 ? 0 : id;
906} 906}
907 907
908static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock) 908void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
909{ 909{
910 /* cBPF to eBPF migrations are currently not in the idr store. */ 910 /* cBPF to eBPF migrations are currently not in the idr store.
911 * Offloaded programs are removed from the store when their device
912 * disappears - even if someone grabs an fd to them they are unusable,
913 * simply waiting for refcnt to drop to be freed.
914 */
911 if (!prog->aux->id) 915 if (!prog->aux->id)
912 return; 916 return;
913 917
@@ -917,6 +921,7 @@ static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
917 __acquire(&prog_idr_lock); 921 __acquire(&prog_idr_lock);
918 922
919 idr_remove(&prog_idr, prog->aux->id); 923 idr_remove(&prog_idr, prog->aux->id);
924 prog->aux->id = 0;
920 925
921 if (do_idr_lock) 926 if (do_idr_lock)
922 spin_unlock_bh(&prog_idr_lock); 927 spin_unlock_bh(&prog_idr_lock);
@@ -1157,6 +1162,8 @@ static int bpf_prog_load(union bpf_attr *attr)
1157 if (!prog) 1162 if (!prog)
1158 return -ENOMEM; 1163 return -ENOMEM;
1159 1164
1165 prog->aux->offload_requested = !!attr->prog_ifindex;
1166
1160 err = security_bpf_prog_alloc(prog->aux); 1167 err = security_bpf_prog_alloc(prog->aux);
1161 if (err) 1168 if (err)
1162 goto free_prog_nouncharge; 1169 goto free_prog_nouncharge;
@@ -1178,7 +1185,7 @@ static int bpf_prog_load(union bpf_attr *attr)
1178 atomic_set(&prog->aux->refcnt, 1); 1185 atomic_set(&prog->aux->refcnt, 1);
1179 prog->gpl_compatible = is_gpl ? 1 : 0; 1186 prog->gpl_compatible = is_gpl ? 1 : 0;
1180 1187
1181 if (attr->prog_ifindex) { 1188 if (bpf_prog_is_dev_bound(prog->aux)) {
1182 err = bpf_prog_offload_init(prog, attr); 1189 err = bpf_prog_offload_init(prog, attr);
1183 if (err) 1190 if (err)
1184 goto free_prog; 1191 goto free_prog;
@@ -1700,6 +1707,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
1700 return -EFAULT; 1707 return -EFAULT;
1701 } 1708 }
1702 1709
1710 if (bpf_prog_is_dev_bound(prog->aux)) {
1711 err = bpf_prog_offload_info_fill(&info, prog);
1712 if (err)
1713 return err;
1714 }
1715
1703done: 1716done:
1704 if (copy_to_user(uinfo, &info, info_len) || 1717 if (copy_to_user(uinfo, &info, info_len) ||
1705 put_user(info_len, &uattr->info.info_len)) 1718 put_user(info_len, &uattr->info.info_len))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 98d8637cf70d..a2b211262c25 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4438,15 +4438,6 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
4438 return 0; 4438 return 0;
4439} 4439}
4440 4440
4441static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
4442 int insn_idx, int prev_insn_idx)
4443{
4444 if (env->dev_ops && env->dev_ops->insn_hook)
4445 return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
4446
4447 return 0;
4448}
4449
4450static int do_check(struct bpf_verifier_env *env) 4441static int do_check(struct bpf_verifier_env *env)
4451{ 4442{
4452 struct bpf_verifier_state *state; 4443 struct bpf_verifier_state *state;
@@ -4531,9 +4522,12 @@ static int do_check(struct bpf_verifier_env *env)
4531 print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks); 4522 print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
4532 } 4523 }
4533 4524
4534 err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); 4525 if (bpf_prog_is_dev_bound(env->prog->aux)) {
4535 if (err) 4526 err = bpf_prog_offload_verify_insn(env, insn_idx,
4536 return err; 4527 prev_insn_idx);
4528 if (err)
4529 return err;
4530 }
4537 4531
4538 regs = cur_regs(env); 4532 regs = cur_regs(env);
4539 env->insn_aux_data[insn_idx].seen = true; 4533 env->insn_aux_data[insn_idx].seen = true;
@@ -5463,7 +5457,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
5463 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 5457 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
5464 env->strict_alignment = true; 5458 env->strict_alignment = true;
5465 5459
5466 if (env->prog->aux->offload) { 5460 if (bpf_prog_is_dev_bound(env->prog->aux)) {
5467 ret = bpf_prog_offload_verifier_prep(env); 5461 ret = bpf_prog_offload_verifier_prep(env);
5468 if (ret) 5462 if (ret)
5469 goto err_unlock; 5463 goto err_unlock;
diff --git a/net/core/Makefile b/net/core/Makefile
index 1fd0a9c88b1b..6dbbba8c57ae 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
11obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ 11obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
12 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ 12 neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
13 sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ 13 sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
14 fib_notifier.o 14 fib_notifier.o xdp.o
15 15
16obj-y += net-sysfs.o 16obj-y += net-sysfs.o
17obj-$(CONFIG_PROC_FS) += net-procfs.o 17obj-$(CONFIG_PROC_FS) += net-procfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 2eb66c0d9cdb..d7925ef8743d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3906,9 +3906,33 @@ drop:
3906 return NET_RX_DROP; 3906 return NET_RX_DROP;
3907} 3907}
3908 3908
3909static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
3910{
3911 struct net_device *dev = skb->dev;
3912 struct netdev_rx_queue *rxqueue;
3913
3914 rxqueue = dev->_rx;
3915
3916 if (skb_rx_queue_recorded(skb)) {
3917 u16 index = skb_get_rx_queue(skb);
3918
3919 if (unlikely(index >= dev->real_num_rx_queues)) {
3920 WARN_ONCE(dev->real_num_rx_queues > 1,
3921 "%s received packet on queue %u, but number "
3922 "of RX queues is %u\n",
3923 dev->name, index, dev->real_num_rx_queues);
3924
3925 return rxqueue; /* Return first rxqueue */
3926 }
3927 rxqueue += index;
3928 }
3929 return rxqueue;
3930}
3931
3909static u32 netif_receive_generic_xdp(struct sk_buff *skb, 3932static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3910 struct bpf_prog *xdp_prog) 3933 struct bpf_prog *xdp_prog)
3911{ 3934{
3935 struct netdev_rx_queue *rxqueue;
3912 u32 metalen, act = XDP_DROP; 3936 u32 metalen, act = XDP_DROP;
3913 struct xdp_buff xdp; 3937 struct xdp_buff xdp;
3914 void *orig_data; 3938 void *orig_data;
@@ -3952,6 +3976,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
3952 xdp.data_hard_start = skb->data - skb_headroom(skb); 3976 xdp.data_hard_start = skb->data - skb_headroom(skb);
3953 orig_data = xdp.data; 3977 orig_data = xdp.data;
3954 3978
3979 rxqueue = netif_get_rxqueue(skb);
3980 xdp.rxq = &rxqueue->xdp_rxq;
3981
3955 act = bpf_prog_run_xdp(xdp_prog, &xdp); 3982 act = bpf_prog_run_xdp(xdp_prog, &xdp);
3956 3983
3957 off = xdp.data - orig_data; 3984 off = xdp.data - orig_data;
@@ -7589,12 +7616,12 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
7589} 7616}
7590EXPORT_SYMBOL(netif_stacked_transfer_operstate); 7617EXPORT_SYMBOL(netif_stacked_transfer_operstate);
7591 7618
7592#ifdef CONFIG_SYSFS
7593static int netif_alloc_rx_queues(struct net_device *dev) 7619static int netif_alloc_rx_queues(struct net_device *dev)
7594{ 7620{
7595 unsigned int i, count = dev->num_rx_queues; 7621 unsigned int i, count = dev->num_rx_queues;
7596 struct netdev_rx_queue *rx; 7622 struct netdev_rx_queue *rx;
7597 size_t sz = count * sizeof(*rx); 7623 size_t sz = count * sizeof(*rx);
7624 int err = 0;
7598 7625
7599 BUG_ON(count < 1); 7626 BUG_ON(count < 1);
7600 7627
@@ -7604,11 +7631,39 @@ static int netif_alloc_rx_queues(struct net_device *dev)
7604 7631
7605 dev->_rx = rx; 7632 dev->_rx = rx;
7606 7633
7607 for (i = 0; i < count; i++) 7634 for (i = 0; i < count; i++) {
7608 rx[i].dev = dev; 7635 rx[i].dev = dev;
7636
7637 /* XDP RX-queue setup */
7638 err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
7639 if (err < 0)
7640 goto err_rxq_info;
7641 }
7609 return 0; 7642 return 0;
7643
7644err_rxq_info:
7645 /* Rollback successful reg's and free other resources */
7646 while (i--)
7647 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
7648 kfree(dev->_rx);
7649 dev->_rx = NULL;
7650 return err;
7651}
7652
7653static void netif_free_rx_queues(struct net_device *dev)
7654{
7655 unsigned int i, count = dev->num_rx_queues;
7656 struct netdev_rx_queue *rx;
7657
7658 /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */
7659 if (!dev->_rx)
7660 return;
7661
7662 rx = dev->_rx;
7663
7664 for (i = 0; i < count; i++)
7665 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
7610} 7666}
7611#endif
7612 7667
7613static void netdev_init_one_queue(struct net_device *dev, 7668static void netdev_init_one_queue(struct net_device *dev,
7614 struct netdev_queue *queue, void *_unused) 7669 struct netdev_queue *queue, void *_unused)
@@ -8169,12 +8224,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
8169 return NULL; 8224 return NULL;
8170 } 8225 }
8171 8226
8172#ifdef CONFIG_SYSFS
8173 if (rxqs < 1) { 8227 if (rxqs < 1) {
8174 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n"); 8228 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
8175 return NULL; 8229 return NULL;
8176 } 8230 }
8177#endif
8178 8231
8179 alloc_size = sizeof(struct net_device); 8232 alloc_size = sizeof(struct net_device);
8180 if (sizeof_priv) { 8233 if (sizeof_priv) {
@@ -8231,12 +8284,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
8231 if (netif_alloc_netdev_queues(dev)) 8284 if (netif_alloc_netdev_queues(dev))
8232 goto free_all; 8285 goto free_all;
8233 8286
8234#ifdef CONFIG_SYSFS
8235 dev->num_rx_queues = rxqs; 8287 dev->num_rx_queues = rxqs;
8236 dev->real_num_rx_queues = rxqs; 8288 dev->real_num_rx_queues = rxqs;
8237 if (netif_alloc_rx_queues(dev)) 8289 if (netif_alloc_rx_queues(dev))
8238 goto free_all; 8290 goto free_all;
8239#endif
8240 8291
8241 strcpy(dev->name, name); 8292 strcpy(dev->name, name);
8242 dev->name_assign_type = name_assign_type; 8293 dev->name_assign_type = name_assign_type;
@@ -8275,9 +8326,7 @@ void free_netdev(struct net_device *dev)
8275 8326
8276 might_sleep(); 8327 might_sleep();
8277 netif_free_tx_queues(dev); 8328 netif_free_tx_queues(dev);
8278#ifdef CONFIG_SYSFS 8329 netif_free_rx_queues(dev);
8279 kvfree(dev->_rx);
8280#endif
8281 8330
8282 kfree(rcu_dereference_protected(dev->ingress_queue, 1)); 8331 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
8283 8332
diff --git a/net/core/filter.c b/net/core/filter.c
index 130b842c3a15..acdb94c0e97f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4304,6 +4304,25 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4304 si->dst_reg, si->src_reg, 4304 si->dst_reg, si->src_reg,
4305 offsetof(struct xdp_buff, data_end)); 4305 offsetof(struct xdp_buff, data_end));
4306 break; 4306 break;
4307 case offsetof(struct xdp_md, ingress_ifindex):
4308 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4309 si->dst_reg, si->src_reg,
4310 offsetof(struct xdp_buff, rxq));
4311 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
4312 si->dst_reg, si->dst_reg,
4313 offsetof(struct xdp_rxq_info, dev));
4314 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4315 bpf_target_off(struct net_device,
4316 ifindex, 4, target_size));
4317 break;
4318 case offsetof(struct xdp_md, rx_queue_index):
4319 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4320 si->dst_reg, si->src_reg,
4321 offsetof(struct xdp_buff, rxq));
4322 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4323 bpf_target_off(struct xdp_rxq_info,
4324 queue_index, 4, target_size));
4325 break;
4307 } 4326 }
4308 4327
4309 return insn - insn_buf; 4328 return insn - insn_buf;
diff --git a/net/core/xdp.c b/net/core/xdp.c
new file mode 100644
index 000000000000..097a0f74e004
--- /dev/null
+++ b/net/core/xdp.c
@@ -0,0 +1,73 @@
1/* net/core/xdp.c
2 *
3 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
4 * Released under terms in GPL version 2. See COPYING.
5 */
6#include <linux/types.h>
7#include <linux/mm.h>
8
9#include <net/xdp.h>
10
11#define REG_STATE_NEW 0x0
12#define REG_STATE_REGISTERED 0x1
13#define REG_STATE_UNREGISTERED 0x2
14#define REG_STATE_UNUSED 0x3
15
16void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
17{
18 /* Simplify driver cleanup code paths, allow unreg "unused" */
19 if (xdp_rxq->reg_state == REG_STATE_UNUSED)
20 return;
21
22 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
23
24 xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
25 xdp_rxq->dev = NULL;
26}
27EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
28
29static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
30{
31 memset(xdp_rxq, 0, sizeof(*xdp_rxq));
32}
33
34/* Returns 0 on success, negative on failure */
35int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
36 struct net_device *dev, u32 queue_index)
37{
38 if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
39 WARN(1, "Driver promised not to register this");
40 return -EINVAL;
41 }
42
43 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
44 WARN(1, "Missing unregister, handled but fix driver");
45 xdp_rxq_info_unreg(xdp_rxq);
46 }
47
48 if (!dev) {
49 WARN(1, "Missing net_device from driver");
50 return -ENODEV;
51 }
52
53 /* State either UNREGISTERED or NEW */
54 xdp_rxq_info_init(xdp_rxq);
55 xdp_rxq->dev = dev;
56 xdp_rxq->queue_index = queue_index;
57
58 xdp_rxq->reg_state = REG_STATE_REGISTERED;
59 return 0;
60}
61EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
62
63void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
64{
65 xdp_rxq->reg_state = REG_STATE_UNUSED;
66}
67EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
68
69bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
70{
71 return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
72}
73EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4fb944a7ecf8..3ff7a05bea9a 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -41,6 +41,7 @@ hostprogs-y += xdp_redirect
41hostprogs-y += xdp_redirect_map 41hostprogs-y += xdp_redirect_map
42hostprogs-y += xdp_redirect_cpu 42hostprogs-y += xdp_redirect_cpu
43hostprogs-y += xdp_monitor 43hostprogs-y += xdp_monitor
44hostprogs-y += xdp_rxq_info
44hostprogs-y += syscall_tp 45hostprogs-y += syscall_tp
45 46
46# Libbpf dependencies 47# Libbpf dependencies
@@ -90,6 +91,7 @@ xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
90xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o 91xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
91xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o 92xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
92xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o 93xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
94xdp_rxq_info-objs := bpf_load.o $(LIBBPF) xdp_rxq_info_user.o
93syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o 95syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
94 96
95# Tell kbuild to always build the programs 97# Tell kbuild to always build the programs
@@ -139,6 +141,7 @@ always += xdp_redirect_kern.o
139always += xdp_redirect_map_kern.o 141always += xdp_redirect_map_kern.o
140always += xdp_redirect_cpu_kern.o 142always += xdp_redirect_cpu_kern.o
141always += xdp_monitor_kern.o 143always += xdp_monitor_kern.o
144always += xdp_rxq_info_kern.o
142always += syscall_tp_kern.o 145always += syscall_tp_kern.o
143 146
144HOSTCFLAGS += -I$(objtree)/usr/include 147HOSTCFLAGS += -I$(objtree)/usr/include
@@ -182,6 +185,7 @@ HOSTLOADLIBES_xdp_redirect += -lelf
182HOSTLOADLIBES_xdp_redirect_map += -lelf 185HOSTLOADLIBES_xdp_redirect_map += -lelf
183HOSTLOADLIBES_xdp_redirect_cpu += -lelf 186HOSTLOADLIBES_xdp_redirect_cpu += -lelf
184HOSTLOADLIBES_xdp_monitor += -lelf 187HOSTLOADLIBES_xdp_monitor += -lelf
188HOSTLOADLIBES_xdp_rxq_info += -lelf
185HOSTLOADLIBES_syscall_tp += -lelf 189HOSTLOADLIBES_syscall_tp += -lelf
186 190
187# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: 191# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
diff --git a/samples/bpf/xdp_rxq_info_kern.c b/samples/bpf/xdp_rxq_info_kern.c
new file mode 100644
index 000000000000..3fd209291653
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_kern.c
@@ -0,0 +1,96 @@
1/* SPDX-License-Identifier: GPL-2.0
2 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
3 *
4 * Example howto extract XDP RX-queue info
5 */
6#include <uapi/linux/bpf.h>
7#include "bpf_helpers.h"
8
9/* Config setup from with userspace
10 *
11 * User-side setup ifindex in config_map, to verify that
12 * ctx->ingress_ifindex is correct (against configured ifindex)
13 */
14struct config {
15 __u32 action;
16 int ifindex;
17};
18struct bpf_map_def SEC("maps") config_map = {
19 .type = BPF_MAP_TYPE_ARRAY,
20 .key_size = sizeof(int),
21 .value_size = sizeof(struct config),
22 .max_entries = 1,
23};
24
25/* Common stats data record (shared with userspace) */
26struct datarec {
27 __u64 processed;
28 __u64 issue;
29};
30
31struct bpf_map_def SEC("maps") stats_global_map = {
32 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
33 .key_size = sizeof(u32),
34 .value_size = sizeof(struct datarec),
35 .max_entries = 1,
36};
37
38#define MAX_RXQs 64
39
40/* Stats per rx_queue_index (per CPU) */
41struct bpf_map_def SEC("maps") rx_queue_index_map = {
42 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
43 .key_size = sizeof(u32),
44 .value_size = sizeof(struct datarec),
45 .max_entries = MAX_RXQs + 1,
46};
47
48SEC("xdp_prog0")
49int xdp_prognum0(struct xdp_md *ctx)
50{
51 void *data_end = (void *)(long)ctx->data_end;
52 void *data = (void *)(long)ctx->data;
53 struct datarec *rec, *rxq_rec;
54 int ingress_ifindex;
55 struct config *config;
56 u32 key = 0;
57
58 /* Global stats record */
59 rec = bpf_map_lookup_elem(&stats_global_map, &key);
60 if (!rec)
61 return XDP_ABORTED;
62 rec->processed++;
63
64 /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF
65 * instructions inside kernel to access xdp_rxq->dev->ifindex
66 */
67 ingress_ifindex = ctx->ingress_ifindex;
68
69 config = bpf_map_lookup_elem(&config_map, &key);
70 if (!config)
71 return XDP_ABORTED;
72
73 /* Simple test: check ctx provided ifindex is as expected */
74 if (ingress_ifindex != config->ifindex) {
75 /* count this error case */
76 rec->issue++;
77 return XDP_ABORTED;
78 }
79
80 /* Update stats per rx_queue_index. Handle if rx_queue_index
81 * is larger than stats map can contain info for.
82 */
83 key = ctx->rx_queue_index;
84 if (key >= MAX_RXQs)
85 key = MAX_RXQs;
86 rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key);
87 if (!rxq_rec)
88 return XDP_ABORTED;
89 rxq_rec->processed++;
90 if (key == MAX_RXQs)
91 rxq_rec->issue++;
92
93 return config->action;
94}
95
96char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
new file mode 100644
index 000000000000..32430e8b3a6a
--- /dev/null
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -0,0 +1,531 @@
1/* SPDX-License-Identifier: GPL-2.0
2 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
3 */
4static const char *__doc__ = " XDP RX-queue info extract example\n\n"
5 "Monitor how many packets per sec (pps) are received\n"
6 "per NIC RX queue index and which CPU processed the packet\n"
7 ;
8
9#include <errno.h>
10#include <signal.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <stdbool.h>
14#include <string.h>
15#include <unistd.h>
16#include <locale.h>
17#include <sys/resource.h>
18#include <getopt.h>
19#include <net/if.h>
20#include <time.h>
21
22#include <arpa/inet.h>
23#include <linux/if_link.h>
24
25#include "libbpf.h"
26#include "bpf_load.h"
27#include "bpf_util.h"
28
29static int ifindex = -1;
30static char ifname_buf[IF_NAMESIZE];
31static char *ifname;
32
33static __u32 xdp_flags;
34
35/* Exit return codes */
36#define EXIT_OK 0
37#define EXIT_FAIL 1
38#define EXIT_FAIL_OPTION 2
39#define EXIT_FAIL_XDP 3
40#define EXIT_FAIL_BPF 4
41#define EXIT_FAIL_MEM 5
42
43static const struct option long_options[] = {
44 {"help", no_argument, NULL, 'h' },
45 {"dev", required_argument, NULL, 'd' },
46 {"skb-mode", no_argument, NULL, 'S' },
47 {"sec", required_argument, NULL, 's' },
48 {"no-separators", no_argument, NULL, 'z' },
49 {"action", required_argument, NULL, 'a' },
50 {0, 0, NULL, 0 }
51};
52
53static void int_exit(int sig)
54{
55 fprintf(stderr,
56 "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
57 ifindex, ifname);
58 if (ifindex > -1)
59 set_link_xdp_fd(ifindex, -1, xdp_flags);
60 exit(EXIT_OK);
61}
62
63struct config {
64 __u32 action;
65 int ifindex;
66};
67#define XDP_ACTION_MAX (XDP_TX + 1)
68#define XDP_ACTION_MAX_STRLEN 11
69static const char *xdp_action_names[XDP_ACTION_MAX] = {
70 [XDP_ABORTED] = "XDP_ABORTED",
71 [XDP_DROP] = "XDP_DROP",
72 [XDP_PASS] = "XDP_PASS",
73 [XDP_TX] = "XDP_TX",
74};
75
76static const char *action2str(int action)
77{
78 if (action < XDP_ACTION_MAX)
79 return xdp_action_names[action];
80 return NULL;
81}
82
83static int parse_xdp_action(char *action_str)
84{
85 size_t maxlen;
86 __u64 action = -1;
87 int i;
88
89 for (i = 0; i < XDP_ACTION_MAX; i++) {
90 maxlen = XDP_ACTION_MAX_STRLEN;
91 if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) {
92 action = i;
93 break;
94 }
95 }
96 return action;
97}
98
99static void list_xdp_actions(void)
100{
101 int i;
102
103 printf("Available XDP --action <options>\n");
104 for (i = 0; i < XDP_ACTION_MAX; i++)
105 printf("\t%s\n", xdp_action_names[i]);
106 printf("\n");
107}
108
109static void usage(char *argv[])
110{
111 int i;
112
113 printf("\nDOCUMENTATION:\n%s\n", __doc__);
114 printf(" Usage: %s (options-see-below)\n", argv[0]);
115 printf(" Listing options:\n");
116 for (i = 0; long_options[i].name != 0; i++) {
117 printf(" --%-12s", long_options[i].name);
118 if (long_options[i].flag != NULL)
119 printf(" flag (internal value:%d)",
120 *long_options[i].flag);
121 else
122 printf(" short-option: -%c",
123 long_options[i].val);
124 printf("\n");
125 }
126 printf("\n");
127 list_xdp_actions();
128}
129
130#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
131static __u64 gettime(void)
132{
133 struct timespec t;
134 int res;
135
136 res = clock_gettime(CLOCK_MONOTONIC, &t);
137 if (res < 0) {
138 fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
139 exit(EXIT_FAIL);
140 }
141 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
142}
143
144/* Common stats data record shared with _kern.c */
145struct datarec {
146 __u64 processed;
147 __u64 issue;
148};
149struct record {
150 __u64 timestamp;
151 struct datarec total;
152 struct datarec *cpu;
153};
154struct stats_record {
155 struct record stats;
156 struct record *rxq;
157};
158
159static struct datarec *alloc_record_per_cpu(void)
160{
161 unsigned int nr_cpus = bpf_num_possible_cpus();
162 struct datarec *array;
163 size_t size;
164
165 size = sizeof(struct datarec) * nr_cpus;
166 array = malloc(size);
167 memset(array, 0, size);
168 if (!array) {
169 fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
170 exit(EXIT_FAIL_MEM);
171 }
172 return array;
173}
174
175static struct record *alloc_record_per_rxq(void)
176{
177 unsigned int nr_rxqs = map_data[2].def.max_entries;
178 struct record *array;
179 size_t size;
180
181 size = sizeof(struct record) * nr_rxqs;
182 array = malloc(size);
183 memset(array, 0, size);
184 if (!array) {
185 fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
186 exit(EXIT_FAIL_MEM);
187 }
188 return array;
189}
190
191static struct stats_record *alloc_stats_record(void)
192{
193 unsigned int nr_rxqs = map_data[2].def.max_entries;
194 struct stats_record *rec;
195 int i;
196
197 rec = malloc(sizeof(*rec));
198 memset(rec, 0, sizeof(*rec));
199 if (!rec) {
200 fprintf(stderr, "Mem alloc error\n");
201 exit(EXIT_FAIL_MEM);
202 }
203 rec->rxq = alloc_record_per_rxq();
204 for (i = 0; i < nr_rxqs; i++)
205 rec->rxq[i].cpu = alloc_record_per_cpu();
206
207 rec->stats.cpu = alloc_record_per_cpu();
208 return rec;
209}
210
211static void free_stats_record(struct stats_record *r)
212{
213 unsigned int nr_rxqs = map_data[2].def.max_entries;
214 int i;
215
216 for (i = 0; i < nr_rxqs; i++)
217 free(r->rxq[i].cpu);
218
219 free(r->rxq);
220 free(r->stats.cpu);
221 free(r);
222}
223
224static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
225{
226 /* For percpu maps, userspace gets a value per possible CPU */
227 unsigned int nr_cpus = bpf_num_possible_cpus();
228 struct datarec values[nr_cpus];
229 __u64 sum_processed = 0;
230 __u64 sum_issue = 0;
231 int i;
232
233 if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
234 fprintf(stderr,
235 "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
236 return false;
237 }
238 /* Get time as close as possible to reading map contents */
239 rec->timestamp = gettime();
240
241 /* Record and sum values from each CPU */
242 for (i = 0; i < nr_cpus; i++) {
243 rec->cpu[i].processed = values[i].processed;
244 sum_processed += values[i].processed;
245 rec->cpu[i].issue = values[i].issue;
246 sum_issue += values[i].issue;
247 }
248 rec->total.processed = sum_processed;
249 rec->total.issue = sum_issue;
250 return true;
251}
252
253static void stats_collect(struct stats_record *rec)
254{
255 int fd, i, max_rxqs;
256
257 fd = map_data[1].fd; /* map: stats_global_map */
258 map_collect_percpu(fd, 0, &rec->stats);
259
260 fd = map_data[2].fd; /* map: rx_queue_index_map */
261 max_rxqs = map_data[2].def.max_entries;
262 for (i = 0; i < max_rxqs; i++)
263 map_collect_percpu(fd, i, &rec->rxq[i]);
264}
265
266static double calc_period(struct record *r, struct record *p)
267{
268 double period_ = 0;
269 __u64 period = 0;
270
271 period = r->timestamp - p->timestamp;
272 if (period > 0)
273 period_ = ((double) period / NANOSEC_PER_SEC);
274
275 return period_;
276}
277
278static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
279{
280 __u64 packets = 0;
281 __u64 pps = 0;
282
283 if (period_ > 0) {
284 packets = r->processed - p->processed;
285 pps = packets / period_;
286 }
287 return pps;
288}
289
290static __u64 calc_errs_pps(struct datarec *r,
291 struct datarec *p, double period_)
292{
293 __u64 packets = 0;
294 __u64 pps = 0;
295
296 if (period_ > 0) {
297 packets = r->issue - p->issue;
298 pps = packets / period_;
299 }
300 return pps;
301}
302
303static void stats_print(struct stats_record *stats_rec,
304 struct stats_record *stats_prev,
305 int action)
306{
307 unsigned int nr_cpus = bpf_num_possible_cpus();
308 unsigned int nr_rxqs = map_data[2].def.max_entries;
309 double pps = 0, err = 0;
310 struct record *rec, *prev;
311 double t;
312 int rxq;
313 int i;
314
315 /* Header */
316 printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s\n",
317 ifname, ifindex, action2str(action));
318
319 /* stats_global_map */
320 {
321 char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n";
322 char *fm2_rx = "%-15s %-7s %'-11.0f\n";
323 char *errstr = "";
324
325 printf("%-15s %-7s %-11s %-11s\n",
326 "XDP stats", "CPU", "pps", "issue-pps");
327
328 rec = &stats_rec->stats;
329 prev = &stats_prev->stats;
330 t = calc_period(rec, prev);
331 for (i = 0; i < nr_cpus; i++) {
332 struct datarec *r = &rec->cpu[i];
333 struct datarec *p = &prev->cpu[i];
334
335 pps = calc_pps (r, p, t);
336 err = calc_errs_pps(r, p, t);
337 if (err > 0)
338 errstr = "invalid-ifindex";
339 if (pps > 0)
340 printf(fmt_rx, "XDP-RX CPU",
341 i, pps, err, errstr);
342 }
343 pps = calc_pps (&rec->total, &prev->total, t);
344 err = calc_errs_pps(&rec->total, &prev->total, t);
345 printf(fm2_rx, "XDP-RX CPU", "total", pps, err);
346 }
347
348 /* rx_queue_index_map */
349 printf("\n%-15s %-7s %-11s %-11s\n",
350 "RXQ stats", "RXQ:CPU", "pps", "issue-pps");
351
352 for (rxq = 0; rxq < nr_rxqs; rxq++) {
353 char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n";
354 char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n";
355 char *errstr = "";
356 int rxq_ = rxq;
357
358 /* Last RXQ in map catch overflows */
359 if (rxq_ == nr_rxqs - 1)
360 rxq_ = -1;
361
362 rec = &stats_rec->rxq[rxq];
363 prev = &stats_prev->rxq[rxq];
364 t = calc_period(rec, prev);
365 for (i = 0; i < nr_cpus; i++) {
366 struct datarec *r = &rec->cpu[i];
367 struct datarec *p = &prev->cpu[i];
368
369 pps = calc_pps (r, p, t);
370 err = calc_errs_pps(r, p, t);
371 if (err > 0) {
372 if (rxq_ == -1)
373 errstr = "map-overflow-RXQ";
374 else
375 errstr = "err";
376 }
377 if (pps > 0)
378 printf(fmt_rx, "rx_queue_index",
379 rxq_, i, pps, err, errstr);
380 }
381 pps = calc_pps (&rec->total, &prev->total, t);
382 err = calc_errs_pps(&rec->total, &prev->total, t);
383 if (pps || err)
384 printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err);
385 }
386}
387
388
389/* Pointer swap trick */
390static inline void swap(struct stats_record **a, struct stats_record **b)
391{
392 struct stats_record *tmp;
393
394 tmp = *a;
395 *a = *b;
396 *b = tmp;
397}
398
399static void stats_poll(int interval, int action)
400{
401 struct stats_record *record, *prev;
402
403 record = alloc_stats_record();
404 prev = alloc_stats_record();
405 stats_collect(record);
406
407 while (1) {
408 swap(&prev, &record);
409 stats_collect(record);
410 stats_print(record, prev, action);
411 sleep(interval);
412 }
413
414 free_stats_record(record);
415 free_stats_record(prev);
416}
417
418
419int main(int argc, char **argv)
420{
421 struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
422 bool use_separators = true;
423 struct config cfg = { 0 };
424 char filename[256];
425 int longindex = 0;
426 int interval = 2;
427 __u32 key = 0;
428 int opt, err;
429
430 char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 };
431 int action = XDP_PASS; /* Default action */
432 char *action_str = NULL;
433
434 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
435
436 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
437 perror("setrlimit(RLIMIT_MEMLOCK)");
438 return 1;
439 }
440
441 if (load_bpf_file(filename)) {
442 fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
443 return EXIT_FAIL;
444 }
445
446 if (!prog_fd[0]) {
447 fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
448 return EXIT_FAIL;
449 }
450
451 /* Parse commands line args */
452 while ((opt = getopt_long(argc, argv, "hSd:",
453 long_options, &longindex)) != -1) {
454 switch (opt) {
455 case 'd':
456 if (strlen(optarg) >= IF_NAMESIZE) {
457 fprintf(stderr, "ERR: --dev name too long\n");
458 goto error;
459 }
460 ifname = (char *)&ifname_buf;
461 strncpy(ifname, optarg, IF_NAMESIZE);
462 ifindex = if_nametoindex(ifname);
463 if (ifindex == 0) {
464 fprintf(stderr,
465 "ERR: --dev name unknown err(%d):%s\n",
466 errno, strerror(errno));
467 goto error;
468 }
469 break;
470 case 's':
471 interval = atoi(optarg);
472 break;
473 case 'S':
474 xdp_flags |= XDP_FLAGS_SKB_MODE;
475 break;
476 case 'z':
477 use_separators = false;
478 break;
479 case 'a':
480 action_str = (char *)&action_str_buf;
481 strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN);
482 break;
483 case 'h':
484 error:
485 default:
486 usage(argv);
487 return EXIT_FAIL_OPTION;
488 }
489 }
490 /* Required option */
491 if (ifindex == -1) {
492 fprintf(stderr, "ERR: required option --dev missing\n");
493 usage(argv);
494 return EXIT_FAIL_OPTION;
495 }
496 cfg.ifindex = ifindex;
497
498 /* Parse action string */
499 if (action_str) {
500 action = parse_xdp_action(action_str);
501 if (action < 0) {
502 fprintf(stderr, "ERR: Invalid XDP --action: %s\n",
503 action_str);
504 list_xdp_actions();
505 return EXIT_FAIL_OPTION;
506 }
507 }
508 cfg.action = action;
509
510 /* Trick to pretty printf with thousands separators use %' */
511 if (use_separators)
512 setlocale(LC_NUMERIC, "en_US");
513
514 /* User-side setup ifindex in config_map */
515 err = bpf_map_update_elem(map_fd[0], &key, &cfg, 0);
516 if (err) {
517 fprintf(stderr, "Store config failed (err:%d)\n", err);
518 exit(EXIT_FAIL_BPF);
519 }
520
521 /* Remove XDP program when program is interrupted */
522 signal(SIGINT, int_exit);
523
524 if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
525 fprintf(stderr, "link set xdp fd failed\n");
526 return EXIT_FAIL_XDP;
527 }
528
529 stats_poll(interval, action);
530 return EXIT_OK;
531}
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 07a6697466ef..c8ec0ae16bf0 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -9,6 +9,35 @@ MAKE = make
9CFLAGS += -Wall -O2 9CFLAGS += -Wall -O2
10CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include 10CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
11 11
12ifeq ($(srctree),)
13srctree := $(patsubst %/,%,$(dir $(CURDIR)))
14srctree := $(patsubst %/,%,$(dir $(srctree)))
15endif
16
17FEATURE_USER = .bpf
18FEATURE_TESTS = libbfd disassembler-four-args
19FEATURE_DISPLAY = libbfd disassembler-four-args
20
21check_feat := 1
22NON_CHECK_FEAT_TARGETS := clean bpftool_clean
23ifdef MAKECMDGOALS
24ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
25 check_feat := 0
26endif
27endif
28
29ifeq ($(check_feat),1)
30ifeq ($(FEATURES_DUMP),)
31include $(srctree)/tools/build/Makefile.feature
32else
33include $(FEATURES_DUMP)
34endif
35endif
36
37ifeq ($(feature-disassembler-four-args), 1)
38CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
39endif
40
12%.yacc.c: %.y 41%.yacc.c: %.y
13 $(YACC) -o $@ -d $< 42 $(YACC) -o $@ -d $<
14 43
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 75bf526a0168..30044bc4f389 100644
--- a/tools/bpf/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -72,7 +72,14 @@ static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
72 72
73 disassemble_init_for_target(&info); 73 disassemble_init_for_target(&info);
74 74
75#ifdef DISASM_FOUR_ARGS_SIGNATURE
76 disassemble = disassembler(info.arch,
77 bfd_big_endian(bfdf),
78 info.mach,
79 bfdf);
80#else
75 disassemble = disassembler(bfdf); 81 disassemble = disassembler(bfdf);
82#endif
76 assert(disassemble); 83 assert(disassemble);
77 84
78 do { 85 do {
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 45c71b1f682b..2fe2a1bdbe3e 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -15,12 +15,12 @@ SYNOPSIS
15 *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } 15 *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
16 16
17 *COMMANDS* := 17 *COMMANDS* :=
18 { **list** | **attach** | **detach** | **help** } 18 { **show** | **list** | **attach** | **detach** | **help** }
19 19
20MAP COMMANDS 20MAP COMMANDS
21============= 21=============
22 22
23| **bpftool** **cgroup list** *CGROUP* 23| **bpftool** **cgroup { show | list }** *CGROUP*
24| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*] 24| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
25| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG* 25| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
26| **bpftool** **cgroup help** 26| **bpftool** **cgroup help**
@@ -31,7 +31,7 @@ MAP COMMANDS
31 31
32DESCRIPTION 32DESCRIPTION
33=========== 33===========
34 **bpftool cgroup list** *CGROUP* 34 **bpftool cgroup { show | list }** *CGROUP*
35 List all programs attached to the cgroup *CGROUP*. 35 List all programs attached to the cgroup *CGROUP*.
36 36
37 Output will start with program ID followed by attach type, 37 Output will start with program ID followed by attach type,
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 421cabc417e6..0ab32b312aec 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -15,13 +15,13 @@ SYNOPSIS
15 *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } 15 *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
16 16
17 *COMMANDS* := 17 *COMMANDS* :=
18 { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** 18 { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
19 | **pin** | **help** } 19 | **pin** | **help** }
20 20
21MAP COMMANDS 21MAP COMMANDS
22============= 22=============
23 23
24| **bpftool** **map show** [*MAP*] 24| **bpftool** **map { show | list }** [*MAP*]
25| **bpftool** **map dump** *MAP* 25| **bpftool** **map dump** *MAP*
26| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*] 26| **bpftool** **map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*]
27| **bpftool** **map lookup** *MAP* **key** *BYTES* 27| **bpftool** **map lookup** *MAP* **key** *BYTES*
@@ -36,7 +36,7 @@ MAP COMMANDS
36 36
37DESCRIPTION 37DESCRIPTION
38=========== 38===========
39 **bpftool map show** [*MAP*] 39 **bpftool map { show | list }** [*MAP*]
40 Show information about loaded maps. If *MAP* is specified 40 Show information about loaded maps. If *MAP* is specified
41 show information only about given map, otherwise list all 41 show information only about given map, otherwise list all
42 maps currently loaded on the system. 42 maps currently loaded on the system.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 81c97c0e9b67..e4ceee7f2dff 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -15,12 +15,12 @@ SYNOPSIS
15 *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } 15 *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
16 16
17 *COMMANDS* := 17 *COMMANDS* :=
18 { **show** | **dump xlated** | **dump jited** | **pin** | **load** | **help** } 18 { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
19 19
20MAP COMMANDS 20MAP COMMANDS
21============= 21=============
22 22
23| **bpftool** **prog show** [*PROG*] 23| **bpftool** **prog { show | list }** [*PROG*]
24| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}] 24| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes**}]
25| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}] 25| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}]
26| **bpftool** **prog pin** *PROG* *FILE* 26| **bpftool** **prog pin** *PROG* *FILE*
@@ -31,7 +31,7 @@ MAP COMMANDS
31 31
32DESCRIPTION 32DESCRIPTION
33=========== 33===========
34 **bpftool prog show** [*PROG*] 34 **bpftool prog { show | list }** [*PROG*]
35 Show information about loaded programs. If *PROG* is 35 Show information about loaded programs. If *PROG* is
36 specified show information only about given program, otherwise 36 specified show information only about given program, otherwise
37 list all programs currently loaded on the system. 37 list all programs currently loaded on the system.
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 6732a5a617e4..20689a321ffe 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -22,13 +22,13 @@ SYNOPSIS
22 | { **-j** | **--json** } [{ **-p** | **--pretty** }] } 22 | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
23 23
24 *MAP-COMMANDS* := 24 *MAP-COMMANDS* :=
25 { **show** | **dump** | **update** | **lookup** | **getnext** | **delete** 25 { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
26 | **pin** | **help** } 26 | **pin** | **help** }
27 27
28 *PROG-COMMANDS* := { **show** | **dump jited** | **dump xlated** | **pin** 28 *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
29 | **load** | **help** } 29 | **load** | **help** }
30 30
31 *CGROUP-COMMANDS* := { **list** | **attach** | **detach** | **help** } 31 *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
32 32
33DESCRIPTION 33DESCRIPTION
34=========== 34===========
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 3f17ad317512..2237bc43f71c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -23,6 +23,8 @@ endif
23 23
24LIBBPF = $(BPF_PATH)libbpf.a 24LIBBPF = $(BPF_PATH)libbpf.a
25 25
26BPFTOOL_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
27
26$(LIBBPF): FORCE 28$(LIBBPF): FORCE
27 $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) 29 $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
28 30
@@ -38,11 +40,36 @@ CC = gcc
38CFLAGS += -O2 40CFLAGS += -O2
39CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow 41CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
40CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ 42CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
43CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
41LIBS = -lelf -lbfd -lopcodes $(LIBBPF) 44LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
42 45
43INSTALL ?= install 46INSTALL ?= install
44RM ?= rm -f 47RM ?= rm -f
45 48
49FEATURE_USER = .bpftool
50FEATURE_TESTS = libbfd disassembler-four-args
51FEATURE_DISPLAY = libbfd disassembler-four-args
52
53check_feat := 1
54NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
55ifdef MAKECMDGOALS
56ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
57 check_feat := 0
58endif
59endif
60
61ifeq ($(check_feat),1)
62ifeq ($(FEATURES_DUMP),)
63include $(srctree)/tools/build/Makefile.feature
64else
65include $(FEATURES_DUMP)
66endif
67endif
68
69ifeq ($(feature-disassembler-four-args), 1)
70CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
71endif
72
46include $(wildcard *.d) 73include $(wildcard *.d)
47 74
48all: $(OUTPUT)bpftool 75all: $(OUTPUT)bpftool
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 7febee05c8e7..0137866bb8f6 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -197,7 +197,7 @@ _bpftool()
197 197
198 local PROG_TYPE='id pinned tag' 198 local PROG_TYPE='id pinned tag'
199 case $command in 199 case $command in
200 show) 200 show|list)
201 [[ $prev != "$command" ]] && return 0 201 [[ $prev != "$command" ]] && return 0
202 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) 202 COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
203 return 0 203 return 0
@@ -232,7 +232,7 @@ _bpftool()
232 ;; 232 ;;
233 *) 233 *)
234 [[ $prev == $object ]] && \ 234 [[ $prev == $object ]] && \
235 COMPREPLY=( $( compgen -W 'dump help pin show' -- \ 235 COMPREPLY=( $( compgen -W 'dump help pin show list' -- \
236 "$cur" ) ) 236 "$cur" ) )
237 ;; 237 ;;
238 esac 238 esac
@@ -240,7 +240,7 @@ _bpftool()
240 map) 240 map)
241 local MAP_TYPE='id pinned' 241 local MAP_TYPE='id pinned'
242 case $command in 242 case $command in
243 show|dump) 243 show|list|dump)
244 case $prev in 244 case $prev in
245 $command) 245 $command)
246 COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) 246 COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
@@ -343,7 +343,7 @@ _bpftool()
343 *) 343 *)
344 [[ $prev == $object ]] && \ 344 [[ $prev == $object ]] && \
345 COMPREPLY=( $( compgen -W 'delete dump getnext help \ 345 COMPREPLY=( $( compgen -W 'delete dump getnext help \
346 lookup pin show update' -- "$cur" ) ) 346 lookup pin show list update' -- "$cur" ) )
347 ;; 347 ;;
348 esac 348 esac
349 ;; 349 ;;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 34ca303d72bc..cae32a61cb18 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -41,7 +41,7 @@ static enum bpf_attach_type parse_attach_type(const char *str)
41 return __MAX_BPF_ATTACH_TYPE; 41 return __MAX_BPF_ATTACH_TYPE;
42} 42}
43 43
44static int list_bpf_prog(int id, const char *attach_type_str, 44static int show_bpf_prog(int id, const char *attach_type_str,
45 const char *attach_flags_str) 45 const char *attach_flags_str)
46{ 46{
47 struct bpf_prog_info info = {}; 47 struct bpf_prog_info info = {};
@@ -77,7 +77,7 @@ static int list_bpf_prog(int id, const char *attach_type_str,
77 return 0; 77 return 0;
78} 78}
79 79
80static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) 80static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
81{ 81{
82 __u32 prog_ids[1024] = {0}; 82 __u32 prog_ids[1024] = {0};
83 char *attach_flags_str; 83 char *attach_flags_str;
@@ -111,29 +111,29 @@ static int list_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
111 } 111 }
112 112
113 for (iter = 0; iter < prog_cnt; iter++) 113 for (iter = 0; iter < prog_cnt; iter++)
114 list_bpf_prog(prog_ids[iter], attach_type_strings[type], 114 show_bpf_prog(prog_ids[iter], attach_type_strings[type],
115 attach_flags_str); 115 attach_flags_str);
116 116
117 return 0; 117 return 0;
118} 118}
119 119
120static int do_list(int argc, char **argv) 120static int do_show(int argc, char **argv)
121{ 121{
122 enum bpf_attach_type type; 122 enum bpf_attach_type type;
123 int cgroup_fd; 123 int cgroup_fd;
124 int ret = -1; 124 int ret = -1;
125 125
126 if (argc < 1) { 126 if (argc < 1) {
127 p_err("too few parameters for cgroup list\n"); 127 p_err("too few parameters for cgroup show");
128 goto exit; 128 goto exit;
129 } else if (argc > 1) { 129 } else if (argc > 1) {
130 p_err("too many parameters for cgroup list\n"); 130 p_err("too many parameters for cgroup show");
131 goto exit; 131 goto exit;
132 } 132 }
133 133
134 cgroup_fd = open(argv[0], O_RDONLY); 134 cgroup_fd = open(argv[0], O_RDONLY);
135 if (cgroup_fd < 0) { 135 if (cgroup_fd < 0) {
136 p_err("can't open cgroup %s\n", argv[1]); 136 p_err("can't open cgroup %s", argv[1]);
137 goto exit; 137 goto exit;
138 } 138 }
139 139
@@ -147,10 +147,10 @@ static int do_list(int argc, char **argv)
147 /* 147 /*
148 * Not all attach types may be supported, so it's expected, 148 * Not all attach types may be supported, so it's expected,
149 * that some requests will fail. 149 * that some requests will fail.
150 * If we were able to get the list for at least one 150 * If we were able to get the show for at least one
151 * attach type, let's return 0. 151 * attach type, let's return 0.
152 */ 152 */
153 if (list_attached_bpf_progs(cgroup_fd, type) == 0) 153 if (show_attached_bpf_progs(cgroup_fd, type) == 0)
154 ret = 0; 154 ret = 0;
155 } 155 }
156 156
@@ -171,19 +171,19 @@ static int do_attach(int argc, char **argv)
171 int i; 171 int i;
172 172
173 if (argc < 4) { 173 if (argc < 4) {
174 p_err("too few parameters for cgroup attach\n"); 174 p_err("too few parameters for cgroup attach");
175 goto exit; 175 goto exit;
176 } 176 }
177 177
178 cgroup_fd = open(argv[0], O_RDONLY); 178 cgroup_fd = open(argv[0], O_RDONLY);
179 if (cgroup_fd < 0) { 179 if (cgroup_fd < 0) {
180 p_err("can't open cgroup %s\n", argv[1]); 180 p_err("can't open cgroup %s", argv[1]);
181 goto exit; 181 goto exit;
182 } 182 }
183 183
184 attach_type = parse_attach_type(argv[1]); 184 attach_type = parse_attach_type(argv[1]);
185 if (attach_type == __MAX_BPF_ATTACH_TYPE) { 185 if (attach_type == __MAX_BPF_ATTACH_TYPE) {
186 p_err("invalid attach type\n"); 186 p_err("invalid attach type");
187 goto exit_cgroup; 187 goto exit_cgroup;
188 } 188 }
189 189
@@ -199,7 +199,7 @@ static int do_attach(int argc, char **argv)
199 } else if (is_prefix(argv[i], "override")) { 199 } else if (is_prefix(argv[i], "override")) {
200 attach_flags |= BPF_F_ALLOW_OVERRIDE; 200 attach_flags |= BPF_F_ALLOW_OVERRIDE;
201 } else { 201 } else {
202 p_err("unknown option: %s\n", argv[i]); 202 p_err("unknown option: %s", argv[i]);
203 goto exit_cgroup; 203 goto exit_cgroup;
204 } 204 }
205 } 205 }
@@ -229,13 +229,13 @@ static int do_detach(int argc, char **argv)
229 int ret = -1; 229 int ret = -1;
230 230
231 if (argc < 4) { 231 if (argc < 4) {
232 p_err("too few parameters for cgroup detach\n"); 232 p_err("too few parameters for cgroup detach");
233 goto exit; 233 goto exit;
234 } 234 }
235 235
236 cgroup_fd = open(argv[0], O_RDONLY); 236 cgroup_fd = open(argv[0], O_RDONLY);
237 if (cgroup_fd < 0) { 237 if (cgroup_fd < 0) {
238 p_err("can't open cgroup %s\n", argv[1]); 238 p_err("can't open cgroup %s", argv[1]);
239 goto exit; 239 goto exit;
240 } 240 }
241 241
@@ -277,7 +277,7 @@ static int do_help(int argc, char **argv)
277 } 277 }
278 278
279 fprintf(stderr, 279 fprintf(stderr,
280 "Usage: %s %s list CGROUP\n" 280 "Usage: %s %s { show | list } CGROUP\n"
281 " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n" 281 " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
282 " %s %s detach CGROUP ATTACH_TYPE PROG\n" 282 " %s %s detach CGROUP ATTACH_TYPE PROG\n"
283 " %s %s help\n" 283 " %s %s help\n"
@@ -294,7 +294,8 @@ static int do_help(int argc, char **argv)
294} 294}
295 295
296static const struct cmd cmds[] = { 296static const struct cmd cmds[] = {
297 { "list", do_list }, 297 { "show", do_show },
298 { "list", do_show },
298 { "attach", do_attach }, 299 { "attach", do_attach },
299 { "detach", do_detach }, 300 { "detach", do_detach },
300 { "help", do_help }, 301 { "help", do_help },
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b62c94e3997a..6601c95a9258 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -44,7 +44,9 @@
44#include <unistd.h> 44#include <unistd.h>
45#include <linux/limits.h> 45#include <linux/limits.h>
46#include <linux/magic.h> 46#include <linux/magic.h>
47#include <net/if.h>
47#include <sys/mount.h> 48#include <sys/mount.h>
49#include <sys/stat.h>
48#include <sys/types.h> 50#include <sys/types.h>
49#include <sys/vfs.h> 51#include <sys/vfs.h>
50 52
@@ -412,3 +414,53 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
412 free(obj); 414 free(obj);
413 } 415 }
414} 416}
417
418static char *
419ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
420{
421 struct stat st;
422 int err;
423
424 err = stat("/proc/self/ns/net", &st);
425 if (err) {
426 p_err("Can't stat /proc/self: %s", strerror(errno));
427 return NULL;
428 }
429
430 if (st.st_dev != ns_dev || st.st_ino != ns_ino)
431 return NULL;
432
433 return if_indextoname(ifindex, buf);
434}
435
436void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
437{
438 char name[IF_NAMESIZE];
439
440 if (!ifindex)
441 return;
442
443 printf(" dev ");
444 if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
445 printf("%s", name);
446 else
447 printf("ifindex %u ns_dev %llu ns_ino %llu",
448 ifindex, ns_dev, ns_inode);
449}
450
451void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
452{
453 char name[IF_NAMESIZE];
454
455 if (!ifindex)
456 return;
457
458 jsonw_name(json_wtr, "dev");
459 jsonw_start_object(json_wtr);
460 jsonw_uint_field(json_wtr, "ifindex", ifindex);
461 jsonw_uint_field(json_wtr, "ns_dev", ns_dev);
462 jsonw_uint_field(json_wtr, "ns_inode", ns_inode);
463 if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
464 jsonw_string_field(json_wtr, "ifname", name);
465 jsonw_end_object(json_wtr);
466}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index 1551d3918d4c..57d32e8a1391 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -107,7 +107,14 @@ void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
107 107
108 disassemble_init_for_target(&info); 108 disassemble_init_for_target(&info);
109 109
110#ifdef DISASM_FOUR_ARGS_SIGNATURE
111 disassemble = disassembler(info.arch,
112 bfd_big_endian(bfdf),
113 info.mach,
114 bfdf);
115#else
110 disassemble = disassembler(bfdf); 116 disassemble = disassembler(bfdf);
117#endif
111 assert(disassemble); 118 assert(disassemble);
112 119
113 if (json_output) 120 if (json_output)
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index ecd53ccf1239..3a0396d87c42 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -38,7 +38,6 @@
38#include <errno.h> 38#include <errno.h>
39#include <getopt.h> 39#include <getopt.h>
40#include <linux/bpf.h> 40#include <linux/bpf.h>
41#include <linux/version.h>
42#include <stdio.h> 41#include <stdio.h>
43#include <stdlib.h> 42#include <stdlib.h>
44#include <string.h> 43#include <string.h>
@@ -95,21 +94,13 @@ static int do_help(int argc, char **argv)
95 94
96static int do_version(int argc, char **argv) 95static int do_version(int argc, char **argv)
97{ 96{
98 unsigned int version[3];
99
100 version[0] = LINUX_VERSION_CODE >> 16;
101 version[1] = LINUX_VERSION_CODE >> 8 & 0xf;
102 version[2] = LINUX_VERSION_CODE & 0xf;
103
104 if (json_output) { 97 if (json_output) {
105 jsonw_start_object(json_wtr); 98 jsonw_start_object(json_wtr);
106 jsonw_name(json_wtr, "version"); 99 jsonw_name(json_wtr, "version");
107 jsonw_printf(json_wtr, "\"%u.%u.%u\"", 100 jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION);
108 version[0], version[1], version[2]);
109 jsonw_end_object(json_wtr); 101 jsonw_end_object(json_wtr);
110 } else { 102 } else {
111 printf("%s v%u.%u.%u\n", bin_name, 103 printf("%s v%s\n", bin_name, BPFTOOL_VERSION);
112 version[0], version[1], version[2]);
113 } 104 }
114 return 0; 105 return 0;
115} 106}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 8f6d3cac0347..65b526fe6e7e 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -96,6 +96,8 @@ struct pinned_obj {
96int build_pinned_obj_table(struct pinned_obj_table *table, 96int build_pinned_obj_table(struct pinned_obj_table *table,
97 enum bpf_obj_type type); 97 enum bpf_obj_type type);
98void delete_pinned_obj_table(struct pinned_obj_table *tab); 98void delete_pinned_obj_table(struct pinned_obj_table *tab);
99void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
100void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
99 101
100struct cmd { 102struct cmd {
101 const char *cmd; 103 const char *cmd;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a8c3a33dd185..8d7db9d6b9cd 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -861,7 +861,7 @@ static int do_help(int argc, char **argv)
861 } 861 }
862 862
863 fprintf(stderr, 863 fprintf(stderr,
864 "Usage: %s %s show [MAP]\n" 864 "Usage: %s %s { show | list } [MAP]\n"
865 " %s %s dump MAP\n" 865 " %s %s dump MAP\n"
866 " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n" 866 " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n"
867 " %s %s lookup MAP key BYTES\n" 867 " %s %s lookup MAP key BYTES\n"
@@ -885,6 +885,7 @@ static int do_help(int argc, char **argv)
885 885
886static const struct cmd cmds[] = { 886static const struct cmd cmds[] = {
887 { "show", do_show }, 887 { "show", do_show },
888 { "list", do_show },
888 { "help", do_help }, 889 { "help", do_help },
889 { "dump", do_dump }, 890 { "dump", do_dump },
890 { "update", do_update }, 891 { "update", do_update },
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index fd0873178503..c6a28be4665c 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -230,6 +230,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
230 info->tag[0], info->tag[1], info->tag[2], info->tag[3], 230 info->tag[0], info->tag[1], info->tag[2], info->tag[3],
231 info->tag[4], info->tag[5], info->tag[6], info->tag[7]); 231 info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
232 232
233 print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
234
233 if (info->load_time) { 235 if (info->load_time) {
234 char buf[32]; 236 char buf[32];
235 237
@@ -287,6 +289,7 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
287 289
288 printf("tag "); 290 printf("tag ");
289 fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); 291 fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
292 print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
290 printf("\n"); 293 printf("\n");
291 294
292 if (info->load_time) { 295 if (info->load_time) {
@@ -810,12 +813,12 @@ static int do_load(int argc, char **argv)
810 usage(); 813 usage();
811 814
812 if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) { 815 if (bpf_prog_load(argv[0], BPF_PROG_TYPE_UNSPEC, &obj, &prog_fd)) {
813 p_err("failed to load program\n"); 816 p_err("failed to load program");
814 return -1; 817 return -1;
815 } 818 }
816 819
817 if (do_pin_fd(prog_fd, argv[1])) { 820 if (do_pin_fd(prog_fd, argv[1])) {
818 p_err("failed to pin program\n"); 821 p_err("failed to pin program");
819 return -1; 822 return -1;
820 } 823 }
821 824
@@ -833,7 +836,7 @@ static int do_help(int argc, char **argv)
833 } 836 }
834 837
835 fprintf(stderr, 838 fprintf(stderr,
836 "Usage: %s %s show [PROG]\n" 839 "Usage: %s %s { show | list } [PROG]\n"
837 " %s %s dump xlated PROG [{ file FILE | opcodes }]\n" 840 " %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
838 " %s %s dump jited PROG [{ file FILE | opcodes }]\n" 841 " %s %s dump jited PROG [{ file FILE | opcodes }]\n"
839 " %s %s pin PROG FILE\n" 842 " %s %s pin PROG FILE\n"
@@ -851,6 +854,7 @@ static int do_help(int argc, char **argv)
851 854
852static const struct cmd cmds[] = { 855static const struct cmd cmds[] = {
853 { "show", do_show }, 856 { "show", do_show },
857 { "list", do_show },
854 { "help", do_help }, 858 { "help", do_help },
855 { "dump", do_dump }, 859 { "dump", do_dump },
856 { "pin", do_pin }, 860 { "pin", do_pin },
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 96982640fbf8..17f2c73fff8b 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -13,6 +13,7 @@ FILES= \
13 test-hello.bin \ 13 test-hello.bin \
14 test-libaudit.bin \ 14 test-libaudit.bin \
15 test-libbfd.bin \ 15 test-libbfd.bin \
16 test-disassembler-four-args.bin \
16 test-liberty.bin \ 17 test-liberty.bin \
17 test-liberty-z.bin \ 18 test-liberty-z.bin \
18 test-cplus-demangle.bin \ 19 test-cplus-demangle.bin \
@@ -188,6 +189,9 @@ $(OUTPUT)test-libpython-version.bin:
188$(OUTPUT)test-libbfd.bin: 189$(OUTPUT)test-libbfd.bin:
189 $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl 190 $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
190 191
192$(OUTPUT)test-disassembler-four-args.bin:
193 $(BUILD) -lbfd -lopcodes
194
191$(OUTPUT)test-liberty.bin: 195$(OUTPUT)test-liberty.bin:
192 $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty 196 $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
193 197
diff --git a/tools/build/feature/test-disassembler-four-args.c b/tools/build/feature/test-disassembler-four-args.c
new file mode 100644
index 000000000000..45ce65cfddf0
--- /dev/null
+++ b/tools/build/feature/test-disassembler-four-args.c
@@ -0,0 +1,15 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <bfd.h>
3#include <dis-asm.h>
4
5int main(void)
6{
7 bfd *abfd = bfd_openr(NULL, NULL);
8
9 disassembler(bfd_get_arch(abfd),
10 bfd_big_endian(abfd),
11 bfd_get_mach(abfd),
12 abfd);
13
14 return 0;
15}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index db1b0923a308..4e8c60acfa32 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
921 __u32 nr_map_ids; 921 __u32 nr_map_ids;
922 __aligned_u64 map_ids; 922 __aligned_u64 map_ids;
923 char name[BPF_OBJ_NAME_LEN]; 923 char name[BPF_OBJ_NAME_LEN];
924 __u32 ifindex;
925 __u64 netns_dev;
926 __u64 netns_ino;
924} __attribute__((aligned(8))); 927} __attribute__((aligned(8)));
925 928
926struct bpf_map_info { 929struct bpf_map_info {
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1304753d29ea..a8aa7e251c8e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -19,7 +19,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
19TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 19TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
20 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 20 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
21 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ 21 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
22 test_l4lb_noinline.o test_xdp_noinline.o 22 test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o
23 23
24TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ 24TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
25 test_offload.py 25 test_offload.py
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index c940505c2978..e3c750f17cb8 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -18,6 +18,8 @@ import argparse
18import json 18import json
19import os 19import os
20import pprint 20import pprint
21import random
22import string
21import subprocess 23import subprocess
22import time 24import time
23 25
@@ -27,6 +29,7 @@ bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
27pp = pprint.PrettyPrinter() 29pp = pprint.PrettyPrinter()
28devs = [] # devices we created for clean up 30devs = [] # devices we created for clean up
29files = [] # files to be removed 31files = [] # files to be removed
32netns = [] # net namespaces to be removed
30 33
31def log_get_sec(level=0): 34def log_get_sec(level=0):
32 return "*" * (log_level + level) 35 return "*" * (log_level + level)
@@ -128,22 +131,25 @@ def rm(f):
128 if f in files: 131 if f in files:
129 files.remove(f) 132 files.remove(f)
130 133
131def tool(name, args, flags, JSON=True, fail=True): 134def tool(name, args, flags, JSON=True, ns="", fail=True):
132 params = "" 135 params = ""
133 if JSON: 136 if JSON:
134 params += "%s " % (flags["json"]) 137 params += "%s " % (flags["json"])
135 138
136 ret, out = cmd(name + " " + params + args, fail=fail) 139 if ns != "":
140 ns = "ip netns exec %s " % (ns)
141
142 ret, out = cmd(ns + name + " " + params + args, fail=fail)
137 if JSON and len(out.strip()) != 0: 143 if JSON and len(out.strip()) != 0:
138 return ret, json.loads(out) 144 return ret, json.loads(out)
139 else: 145 else:
140 return ret, out 146 return ret, out
141 147
142def bpftool(args, JSON=True, fail=True): 148def bpftool(args, JSON=True, ns="", fail=True):
143 return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail) 149 return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
144 150
145def bpftool_prog_list(expected=None): 151def bpftool_prog_list(expected=None, ns=""):
146 _, progs = bpftool("prog show", JSON=True, fail=True) 152 _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
147 if expected is not None: 153 if expected is not None:
148 if len(progs) != expected: 154 if len(progs) != expected:
149 fail(True, "%d BPF programs loaded, expected %d" % 155 fail(True, "%d BPF programs loaded, expected %d" %
@@ -158,13 +164,13 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
158 time.sleep(0.05) 164 time.sleep(0.05)
159 raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) 165 raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
160 166
161def ip(args, force=False, JSON=True, fail=True): 167def ip(args, force=False, JSON=True, ns="", fail=True):
162 if force: 168 if force:
163 args = "-force " + args 169 args = "-force " + args
164 return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail) 170 return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
165 171
166def tc(args, JSON=True, fail=True): 172def tc(args, JSON=True, ns="", fail=True):
167 return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail) 173 return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
168 174
169def ethtool(dev, opt, args, fail=True): 175def ethtool(dev, opt, args, fail=True):
170 return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) 176 return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -178,6 +184,15 @@ def bpf_pinned(name):
178def bpf_bytecode(bytecode): 184def bpf_bytecode(bytecode):
179 return "bytecode \"%s\"" % (bytecode) 185 return "bytecode \"%s\"" % (bytecode)
180 186
187def mknetns(n_retry=10):
188 for i in range(n_retry):
189 name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
190 ret, _ = ip("netns add %s" % (name), fail=False)
191 if ret == 0:
192 netns.append(name)
193 return name
194 return None
195
181class DebugfsDir: 196class DebugfsDir:
182 """ 197 """
183 Class for accessing DebugFS directories as a dictionary. 198 Class for accessing DebugFS directories as a dictionary.
@@ -237,6 +252,8 @@ class NetdevSim:
237 self.dev = self._netdevsim_create() 252 self.dev = self._netdevsim_create()
238 devs.append(self) 253 devs.append(self)
239 254
255 self.ns = ""
256
240 self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) 257 self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
241 self.dfs_refresh() 258 self.dfs_refresh()
242 259
@@ -257,7 +274,7 @@ class NetdevSim:
257 274
258 def remove(self): 275 def remove(self):
259 devs.remove(self) 276 devs.remove(self)
260 ip("link del dev %s" % (self.dev["ifname"])) 277 ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
261 278
262 def dfs_refresh(self): 279 def dfs_refresh(self):
263 self.dfs = DebugfsDir(self.dfs_dir) 280 self.dfs = DebugfsDir(self.dfs_dir)
@@ -285,6 +302,11 @@ class NetdevSim:
285 time.sleep(0.05) 302 time.sleep(0.05)
286 raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) 303 raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
287 304
305 def set_ns(self, ns):
306 name = "1" if ns == "" else ns
307 ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
308 self.ns = ns
309
288 def set_mtu(self, mtu, fail=True): 310 def set_mtu(self, mtu, fail=True):
289 return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), 311 return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
290 fail=fail) 312 fail=fail)
@@ -372,6 +394,8 @@ def clean_up():
372 dev.remove() 394 dev.remove()
373 for f in files: 395 for f in files:
374 cmd("rm -f %s" % (f)) 396 cmd("rm -f %s" % (f))
397 for ns in netns:
398 cmd("ip netns delete %s" % (ns))
375 399
376def pin_prog(file_name, idx=0): 400def pin_prog(file_name, idx=0):
377 progs = bpftool_prog_list(expected=(idx + 1)) 401 progs = bpftool_prog_list(expected=(idx + 1))
@@ -381,6 +405,35 @@ def pin_prog(file_name, idx=0):
381 405
382 return file_name, bpf_pinned(file_name) 406 return file_name, bpf_pinned(file_name)
383 407
408def check_dev_info(other_ns, ns, pin_file=None, removed=False):
409 if removed:
410 bpftool_prog_list(expected=0)
411 ret, err = bpftool("prog show pin %s" % (pin_file), fail=False)
412 fail(ret == 0, "Showing prog with removed device did not fail")
413 fail(err["error"].find("No such device") == -1,
414 "Showing prog with removed device expected ENODEV, error is %s" %
415 (err["error"]))
416 return
417 progs = bpftool_prog_list(expected=int(not removed), ns=ns)
418 prog = progs[0]
419
420 fail("dev" not in prog.keys(), "Device parameters not reported")
421 dev = prog["dev"]
422 fail("ifindex" not in dev.keys(), "Device parameters not reported")
423 fail("ns_dev" not in dev.keys(), "Device parameters not reported")
424 fail("ns_inode" not in dev.keys(), "Device parameters not reported")
425
426 if not removed and not other_ns:
427 fail("ifname" not in dev.keys(), "Ifname not reported")
428 fail(dev["ifname"] != sim["ifname"],
429 "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
430 else:
431 fail("ifname" in dev.keys(), "Ifname is reported for other ns")
432 if removed:
433 fail(dev["ifindex"] != 0, "Device perameters not zero on removed")
434 fail(dev["ns_dev"] != 0, "Device perameters not zero on removed")
435 fail(dev["ns_inode"] != 0, "Device perameters not zero on removed")
436
384# Parse command line 437# Parse command line
385parser = argparse.ArgumentParser() 438parser = argparse.ArgumentParser()
386parser.add_argument("--log", help="output verbose log to given file") 439parser.add_argument("--log", help="output verbose log to given file")
@@ -417,6 +470,12 @@ for s in samples:
417 skip(ret != 0, "sample %s/%s not found, please compile it" % 470 skip(ret != 0, "sample %s/%s not found, please compile it" %
418 (bpf_test_dir, s)) 471 (bpf_test_dir, s))
419 472
473# Check if net namespaces seem to work
474ns = mknetns()
475skip(ns is None, "Could not create a net namespace")
476cmd("ip netns delete %s" % (ns))
477netns = []
478
420try: 479try:
421 obj = bpf_obj("sample_ret0.o") 480 obj = bpf_obj("sample_ret0.o")
422 bytecode = bpf_bytecode("1,6 0 0 4294967295,") 481 bytecode = bpf_bytecode("1,6 0 0 4294967295,")
@@ -549,6 +608,8 @@ try:
549 progs = bpftool_prog_list(expected=1) 608 progs = bpftool_prog_list(expected=1)
550 fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], 609 fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
551 "Loaded program has wrong ID") 610 "Loaded program has wrong ID")
611 fail("dev" in progs[0].keys(),
612 "Device parameters reported for non-offloaded program")
552 613
553 start_test("Test XDP prog replace with bad flags...") 614 start_test("Test XDP prog replace with bad flags...")
554 ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False) 615 ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
@@ -673,6 +734,35 @@ try:
673 fail(time_diff < delay_sec, "Removal process took %s, expected %s" % 734 fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
674 (time_diff, delay_sec)) 735 (time_diff, delay_sec))
675 736
737 # Remove all pinned files and reinstantiate the netdev
738 clean_up()
739 bpftool_prog_list_wait(expected=0)
740
741 sim = NetdevSim()
742 sim.set_ethtool_tc_offloads(True)
743 sim.set_xdp(obj, "offload")
744
745 start_test("Test bpftool bound info reporting (own ns)...")
746 check_dev_info(False, "")
747
748 start_test("Test bpftool bound info reporting (other ns)...")
749 ns = mknetns()
750 sim.set_ns(ns)
751 check_dev_info(True, "")
752
753 start_test("Test bpftool bound info reporting (remote ns)...")
754 check_dev_info(False, ns)
755
756 start_test("Test bpftool bound info reporting (back to own ns)...")
757 sim.set_ns("")
758 check_dev_info(False, "")
759
760 pin_file, _ = pin_prog("/sys/fs/bpf/tmp")
761 sim.remove()
762
763 start_test("Test bpftool bound info reporting (removed dev)...")
764 check_dev_info(True, "", pin_file=pin_file, removed=True)
765
676 print("%s: OK" % (os.path.basename(__file__))) 766 print("%s: OK" % (os.path.basename(__file__)))
677 767
678finally: 768finally:
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 09087ab12293..b549308abd19 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -837,6 +837,132 @@ static void test_tp_attach_query(void)
837 free(query); 837 free(query);
838} 838}
839 839
840static int compare_map_keys(int map1_fd, int map2_fd)
841{
842 __u32 key, next_key;
843 char val_buf[PERF_MAX_STACK_DEPTH * sizeof(__u64)];
844 int err;
845
846 err = bpf_map_get_next_key(map1_fd, NULL, &key);
847 if (err)
848 return err;
849 err = bpf_map_lookup_elem(map2_fd, &key, val_buf);
850 if (err)
851 return err;
852
853 while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) {
854 err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf);
855 if (err)
856 return err;
857
858 key = next_key;
859 }
860 if (errno != ENOENT)
861 return -1;
862
863 return 0;
864}
865
866static void test_stacktrace_map()
867{
868 int control_map_fd, stackid_hmap_fd, stackmap_fd;
869 const char *file = "./test_stacktrace_map.o";
870 int bytes, efd, err, pmu_fd, prog_fd;
871 struct perf_event_attr attr = {};
872 __u32 key, val, duration = 0;
873 struct bpf_object *obj;
874 char buf[256];
875
876 err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
877 if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
878 goto out;
879
880 /* Get the ID for the sched/sched_switch tracepoint */
881 snprintf(buf, sizeof(buf),
882 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
883 efd = open(buf, O_RDONLY, 0);
884 if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
885 goto close_prog;
886
887 bytes = read(efd, buf, sizeof(buf));
888 close(efd);
889 if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
890 "read", "bytes %d errno %d\n", bytes, errno))
891 goto close_prog;
892
893 /* Open the perf event and attach bpf progrram */
894 attr.config = strtol(buf, NULL, 0);
895 attr.type = PERF_TYPE_TRACEPOINT;
896 attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
897 attr.sample_period = 1;
898 attr.wakeup_events = 1;
899 pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
900 0 /* cpu 0 */, -1 /* group id */,
901 0 /* flags */);
902 if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
903 pmu_fd, errno))
904 goto close_prog;
905
906 err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
907 if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
908 err, errno))
909 goto close_pmu;
910
911 err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
912 if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
913 err, errno))
914 goto disable_pmu;
915
916 /* find map fds */
917 control_map_fd = bpf_find_map(__func__, obj, "control_map");
918 if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
919 "err %d errno %d\n", err, errno))
920 goto disable_pmu;
921
922 stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
923 if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
924 "err %d errno %d\n", err, errno))
925 goto disable_pmu;
926
927 stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
928 if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
929 err, errno))
930 goto disable_pmu;
931
932 /* give some time for bpf program run */
933 sleep(1);
934
935 /* disable stack trace collection */
936 key = 0;
937 val = 1;
938 bpf_map_update_elem(control_map_fd, &key, &val, 0);
939
940 /* for every element in stackid_hmap, we can find a corresponding one
941 * in stackmap, and vise versa.
942 */
943 err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
944 if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
945 "err %d errno %d\n", err, errno))
946 goto disable_pmu;
947
948 err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
949 if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
950 "err %d errno %d\n", err, errno))
951 ; /* fall through */
952
953disable_pmu:
954 ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
955
956close_pmu:
957 close(pmu_fd);
958
959close_prog:
960 bpf_object__close(obj);
961
962out:
963 return;
964}
965
840int main(void) 966int main(void)
841{ 967{
842 struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY }; 968 struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -852,6 +978,7 @@ int main(void)
852 test_pkt_md_access(); 978 test_pkt_md_access();
853 test_obj_name(); 979 test_obj_name();
854 test_tp_attach_query(); 980 test_tp_attach_query();
981 test_stacktrace_map();
855 982
856 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); 983 printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
857 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; 984 return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
new file mode 100644
index 000000000000..76d85c5d08bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -0,0 +1,62 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2018 Facebook
3
4#include <linux/bpf.h>
5#include "bpf_helpers.h"
6
7#ifndef PERF_MAX_STACK_DEPTH
8#define PERF_MAX_STACK_DEPTH 127
9#endif
10
11struct bpf_map_def SEC("maps") control_map = {
12 .type = BPF_MAP_TYPE_ARRAY,
13 .key_size = sizeof(__u32),
14 .value_size = sizeof(__u32),
15 .max_entries = 1,
16};
17
18struct bpf_map_def SEC("maps") stackid_hmap = {
19 .type = BPF_MAP_TYPE_HASH,
20 .key_size = sizeof(__u32),
21 .value_size = sizeof(__u32),
22 .max_entries = 10000,
23};
24
25struct bpf_map_def SEC("maps") stackmap = {
26 .type = BPF_MAP_TYPE_STACK_TRACE,
27 .key_size = sizeof(__u32),
28 .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
29 .max_entries = 10000,
30};
31
32/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
33struct sched_switch_args {
34 unsigned long long pad;
35 char prev_comm[16];
36 int prev_pid;
37 int prev_prio;
38 long long prev_state;
39 char next_comm[16];
40 int next_pid;
41 int next_prio;
42};
43
44SEC("tracepoint/sched/sched_switch")
45int oncpu(struct sched_switch_args *ctx)
46{
47 __u32 key = 0, val = 0, *value_p;
48
49 value_p = bpf_map_lookup_elem(&control_map, &key);
50 if (value_p && *value_p)
51 return 0; /* skip if non-zero *value_p */
52
53 /* The size of stackmap and stackid_hmap should be the same */
54 key = bpf_get_stackid(ctx, &stackmap, 0);
55 if ((int)key >= 0)
56 bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
57
58 return 0;
59}
60
61char _license[] SEC("license") = "GPL";
62__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */