aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2018-05-04 17:41:05 -0400
committerDaniel Borkmann <daniel@iogearbox.net>2018-05-04 17:41:06 -0400
commita5458aa923be8960a78d6fdfa1c6ff769b34deb2 (patch)
tree1e14a76c6a39662d602bf9589a31245ca9d74cdd
parentc27638c0628a5507e421f325dae3d3c9a45f227e (diff)
parentab7f5bf0928be2f148d000a6eaa6c0a36e74750e (diff)
Merge branch 'bpf-event-output-offload'
Jakub Kicinski says: ==================== This series centres on NFP offload of bpf_event_output(). The first patch allows perf event arrays to be used by offloaded programs. Next patch makes the nfp driver keep track of such arrays to be able to filter FW events referring to maps. Perf event arrays are not device bound. Having driver reimplement and manage the perf array seems brittle and unnecessary. Patch 4 moves slightly the verifier step which replaces map fds with map pointers. This is useful for nfp JIT since we can then easily replace host pointers with NFP table ids (patch 6). This allows us to lift the limitation on map helpers having to be used with the same map pointer on all paths. Second use of replacing fds with real host map pointers is that we can use the host map pointer as a key for FW events in perf event array offload. Patch 5 adds perf event output offload support for the NFP. There are some differences between bpf_event_output() offloaded and non-offloaded version. The FW messages which carry events may get dropped and reordered relatively easily. The return codes from the helper are also not guaranteed to match the host. Users are warned about some of those discrepancies with a one time warning message to kernel logs. bpftool gains an ability to dump perf ring events in a very simple format. This was very useful for testing and simple debug, maybe it will be useful to others? Last patch is a trivial comment fix. ==================== Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/cmsg.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/fw.h20
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c76
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c28
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h24
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c172
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c78
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.c2
-rw-r--r--include/linux/bpf.h5
-rw-r--r--kernel/bpf/core.c1
-rw-r--r--kernel/bpf/offload.c6
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c14
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst40
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst2
-rw-r--r--tools/bpf/bpftool/Makefile7
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool36
-rw-r--r--tools/bpf/bpftool/common.c77
-rw-r--r--tools/bpf/bpftool/main.h7
-rw-r--r--tools/bpf/bpftool/map.c80
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c347
21 files changed, 916 insertions, 124 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
index 7e298148ca26..cb87fccb9f6a 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -102,6 +102,15 @@ nfp_bpf_cmsg_map_req_alloc(struct nfp_app_bpf *bpf, unsigned int n)
102 return nfp_bpf_cmsg_alloc(bpf, size); 102 return nfp_bpf_cmsg_alloc(bpf, size);
103} 103}
104 104
105static u8 nfp_bpf_cmsg_get_type(struct sk_buff *skb)
106{
107 struct cmsg_hdr *hdr;
108
109 hdr = (struct cmsg_hdr *)skb->data;
110
111 return hdr->type;
112}
113
105static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb) 114static unsigned int nfp_bpf_cmsg_get_tag(struct sk_buff *skb)
106{ 115{
107 struct cmsg_hdr *hdr; 116 struct cmsg_hdr *hdr;
@@ -431,6 +440,11 @@ void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb)
431 goto err_free; 440 goto err_free;
432 } 441 }
433 442
443 if (nfp_bpf_cmsg_get_type(skb) == CMSG_TYPE_BPF_EVENT) {
444 nfp_bpf_event_output(bpf, skb);
445 return;
446 }
447
434 nfp_ctrl_lock(bpf->app->ctrl); 448 nfp_ctrl_lock(bpf->app->ctrl);
435 449
436 tag = nfp_bpf_cmsg_get_tag(skb); 450 tag = nfp_bpf_cmsg_get_tag(skb);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/fw.h b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
index 39639ac28b01..3dbc21653ce5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/fw.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -37,6 +37,14 @@
37#include <linux/bitops.h> 37#include <linux/bitops.h>
38#include <linux/types.h> 38#include <linux/types.h>
39 39
40/* Kernel's enum bpf_reg_type is not uABI so people may change it breaking
41 * our FW ABI. In that case we will do translation in the driver.
42 */
43#define NFP_BPF_SCALAR_VALUE 1
44#define NFP_BPF_MAP_VALUE 4
45#define NFP_BPF_STACK 6
46#define NFP_BPF_PACKET_DATA 8
47
40enum bpf_cap_tlv_type { 48enum bpf_cap_tlv_type {
41 NFP_BPF_CAP_TYPE_FUNC = 1, 49 NFP_BPF_CAP_TYPE_FUNC = 1,
42 NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2, 50 NFP_BPF_CAP_TYPE_ADJUST_HEAD = 2,
@@ -81,6 +89,7 @@ enum nfp_bpf_cmsg_type {
81 CMSG_TYPE_MAP_DELETE = 5, 89 CMSG_TYPE_MAP_DELETE = 5,
82 CMSG_TYPE_MAP_GETNEXT = 6, 90 CMSG_TYPE_MAP_GETNEXT = 6,
83 CMSG_TYPE_MAP_GETFIRST = 7, 91 CMSG_TYPE_MAP_GETFIRST = 7,
92 CMSG_TYPE_BPF_EVENT = 8,
84 __CMSG_TYPE_MAP_MAX, 93 __CMSG_TYPE_MAP_MAX,
85}; 94};
86 95
@@ -155,4 +164,13 @@ struct cmsg_reply_map_op {
155 __be32 resv; 164 __be32 resv;
156 struct cmsg_key_value_pair elem[0]; 165 struct cmsg_key_value_pair elem[0];
157}; 166};
167
168struct cmsg_bpf_event {
169 struct cmsg_hdr hdr;
170 __be32 cpu_id;
171 __be64 map_ptr;
172 __be32 data_size;
173 __be32 pkt_size;
174 u8 data[0];
175};
158#endif 176#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 65f0791cae0c..326a2085d650 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2016-2017 Netronome Systems, Inc. 2 * Copyright (C) 2016-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -1395,15 +1395,9 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1395static int 1395static int
1396map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1396map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1397{ 1397{
1398 struct bpf_offloaded_map *offmap;
1399 struct nfp_bpf_map *nfp_map;
1400 bool load_lm_ptr; 1398 bool load_lm_ptr;
1401 u32 ret_tgt; 1399 u32 ret_tgt;
1402 s64 lm_off; 1400 s64 lm_off;
1403 swreg tid;
1404
1405 offmap = (struct bpf_offloaded_map *)meta->arg1.map_ptr;
1406 nfp_map = offmap->dev_priv;
1407 1401
1408 /* We only have to reload LM0 if the key is not at start of stack */ 1402 /* We only have to reload LM0 if the key is not at start of stack */
1409 lm_off = nfp_prog->stack_depth; 1403 lm_off = nfp_prog->stack_depth;
@@ -1416,17 +1410,12 @@ map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1416 if (meta->func_id == BPF_FUNC_map_update_elem) 1410 if (meta->func_id == BPF_FUNC_map_update_elem)
1417 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); 1411 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
1418 1412
1419 /* Load map ID into a register, it should actually fit as an immediate
1420 * but in case it doesn't deal with it here, not in the delay slots.
1421 */
1422 tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog));
1423
1424 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, 1413 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1425 2, RELO_BR_HELPER); 1414 2, RELO_BR_HELPER);
1426 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; 1415 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1427 1416
1428 /* Load map ID into A0 */ 1417 /* Load map ID into A0 */
1429 wrp_mov(nfp_prog, reg_a(0), tid); 1418 wrp_mov(nfp_prog, reg_a(0), reg_a(2));
1430 1419
1431 /* Load the return address into B0 */ 1420 /* Load the return address into B0 */
1432 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); 1421 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
@@ -1456,6 +1445,31 @@ nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1456 return 0; 1445 return 0;
1457} 1446}
1458 1447
1448static int
1449nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1450{
1451 swreg ptr_type;
1452 u32 ret_tgt;
1453
1454 ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
1455
1456 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
1457
1458 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1459 2, RELO_BR_HELPER);
1460
1461 /* Load ptr type into A1 */
1462 wrp_mov(nfp_prog, reg_a(1), ptr_type);
1463
1464 /* Load the return address into B0 */
1465 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1466
1467 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1468 return -EINVAL;
1469
1470 return 0;
1471}
1472
1459/* --- Callbacks --- */ 1473/* --- Callbacks --- */
1460static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) 1474static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1461{ 1475{
@@ -2411,6 +2425,8 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2411 return map_call_stack_common(nfp_prog, meta); 2425 return map_call_stack_common(nfp_prog, meta);
2412 case BPF_FUNC_get_prandom_u32: 2426 case BPF_FUNC_get_prandom_u32:
2413 return nfp_get_prandom_u32(nfp_prog, meta); 2427 return nfp_get_prandom_u32(nfp_prog, meta);
2428 case BPF_FUNC_perf_event_output:
2429 return nfp_perf_event_output(nfp_prog, meta);
2414 default: 2430 default:
2415 WARN_ONCE(1, "verifier allowed unsupported function\n"); 2431 WARN_ONCE(1, "verifier allowed unsupported function\n");
2416 return -EOPNOTSUPP; 2432 return -EOPNOTSUPP;
@@ -3227,6 +3243,33 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
3227 return 0; 3243 return 0;
3228} 3244}
3229 3245
3246static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
3247{
3248 struct nfp_insn_meta *meta1, *meta2;
3249 struct nfp_bpf_map *nfp_map;
3250 struct bpf_map *map;
3251
3252 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3253 if (meta1->skip || meta2->skip)
3254 continue;
3255
3256 if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
3257 meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
3258 continue;
3259
3260 map = (void *)(unsigned long)((u32)meta1->insn.imm |
3261 (u64)meta2->insn.imm << 32);
3262 if (bpf_map_offload_neutral(map))
3263 continue;
3264 nfp_map = map_to_offmap(map)->dev_priv;
3265
3266 meta1->insn.imm = nfp_map->tid;
3267 meta2->insn.imm = 0;
3268 }
3269
3270 return 0;
3271}
3272
3230static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) 3273static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
3231{ 3274{
3232 __le64 *ustore = (__force __le64 *)prog; 3275 __le64 *ustore = (__force __le64 *)prog;
@@ -3263,6 +3306,10 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog)
3263{ 3306{
3264 int ret; 3307 int ret;
3265 3308
3309 ret = nfp_bpf_replace_map_ptrs(nfp_prog);
3310 if (ret)
3311 return ret;
3312
3266 ret = nfp_bpf_optimize(nfp_prog); 3313 ret = nfp_bpf_optimize(nfp_prog);
3267 if (ret) 3314 if (ret)
3268 return ret; 3315 return ret;
@@ -3353,6 +3400,9 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
3353 case BPF_FUNC_map_delete_elem: 3400 case BPF_FUNC_map_delete_elem:
3354 val = nfp_prog->bpf->helpers.map_delete; 3401 val = nfp_prog->bpf->helpers.map_delete;
3355 break; 3402 break;
3403 case BPF_FUNC_perf_event_output:
3404 val = nfp_prog->bpf->helpers.perf_event_output;
3405 break;
3356 default: 3406 default:
3357 pr_err("relocation of unknown helper %d\n", 3407 pr_err("relocation of unknown helper %d\n",
3358 val); 3408 val);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 1dc424685f4e..d72f9e7f42da 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -43,6 +43,14 @@
43#include "fw.h" 43#include "fw.h"
44#include "main.h" 44#include "main.h"
45 45
46const struct rhashtable_params nfp_bpf_maps_neutral_params = {
47 .nelem_hint = 4,
48 .key_len = FIELD_SIZEOF(struct nfp_bpf_neutral_map, ptr),
49 .key_offset = offsetof(struct nfp_bpf_neutral_map, ptr),
50 .head_offset = offsetof(struct nfp_bpf_neutral_map, l),
51 .automatic_shrinking = true,
52};
53
46static bool nfp_net_ebpf_capable(struct nfp_net *nn) 54static bool nfp_net_ebpf_capable(struct nfp_net *nn)
47{ 55{
48#ifdef __LITTLE_ENDIAN 56#ifdef __LITTLE_ENDIAN
@@ -290,6 +298,9 @@ nfp_bpf_parse_cap_func(struct nfp_app_bpf *bpf, void __iomem *value, u32 length)
290 case BPF_FUNC_map_delete_elem: 298 case BPF_FUNC_map_delete_elem:
291 bpf->helpers.map_delete = readl(&cap->func_addr); 299 bpf->helpers.map_delete = readl(&cap->func_addr);
292 break; 300 break;
301 case BPF_FUNC_perf_event_output:
302 bpf->helpers.perf_event_output = readl(&cap->func_addr);
303 break;
293 } 304 }
294 305
295 return 0; 306 return 0;
@@ -401,17 +412,28 @@ static int nfp_bpf_init(struct nfp_app *app)
401 init_waitqueue_head(&bpf->cmsg_wq); 412 init_waitqueue_head(&bpf->cmsg_wq);
402 INIT_LIST_HEAD(&bpf->map_list); 413 INIT_LIST_HEAD(&bpf->map_list);
403 414
404 err = nfp_bpf_parse_capabilities(app); 415 err = rhashtable_init(&bpf->maps_neutral, &nfp_bpf_maps_neutral_params);
405 if (err) 416 if (err)
406 goto err_free_bpf; 417 goto err_free_bpf;
407 418
419 err = nfp_bpf_parse_capabilities(app);
420 if (err)
421 goto err_free_neutral_maps;
422
408 return 0; 423 return 0;
409 424
425err_free_neutral_maps:
426 rhashtable_destroy(&bpf->maps_neutral);
410err_free_bpf: 427err_free_bpf:
411 kfree(bpf); 428 kfree(bpf);
412 return err; 429 return err;
413} 430}
414 431
432static void nfp_check_rhashtable_empty(void *ptr, void *arg)
433{
434 WARN_ON_ONCE(1);
435}
436
415static void nfp_bpf_clean(struct nfp_app *app) 437static void nfp_bpf_clean(struct nfp_app *app)
416{ 438{
417 struct nfp_app_bpf *bpf = app->priv; 439 struct nfp_app_bpf *bpf = app->priv;
@@ -419,6 +441,8 @@ static void nfp_bpf_clean(struct nfp_app *app)
419 WARN_ON(!skb_queue_empty(&bpf->cmsg_replies)); 441 WARN_ON(!skb_queue_empty(&bpf->cmsg_replies));
420 WARN_ON(!list_empty(&bpf->map_list)); 442 WARN_ON(!list_empty(&bpf->map_list));
421 WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use); 443 WARN_ON(bpf->maps_in_use || bpf->map_elems_in_use);
444 rhashtable_free_and_destroy(&bpf->maps_neutral,
445 nfp_check_rhashtable_empty, NULL);
422 kfree(bpf); 446 kfree(bpf);
423} 447}
424 448
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 68b5d326483d..82682378d57f 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2016-2017 Netronome Systems, Inc. 2 * Copyright (C) 2016-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -39,6 +39,7 @@
39#include <linux/bpf_verifier.h> 39#include <linux/bpf_verifier.h>
40#include <linux/kernel.h> 40#include <linux/kernel.h>
41#include <linux/list.h> 41#include <linux/list.h>
42#include <linux/rhashtable.h>
42#include <linux/skbuff.h> 43#include <linux/skbuff.h>
43#include <linux/types.h> 44#include <linux/types.h>
44#include <linux/wait.h> 45#include <linux/wait.h>
@@ -114,6 +115,8 @@ enum pkt_vec {
114 * @maps_in_use: number of currently offloaded maps 115 * @maps_in_use: number of currently offloaded maps
115 * @map_elems_in_use: number of elements allocated to offloaded maps 116 * @map_elems_in_use: number of elements allocated to offloaded maps
116 * 117 *
118 * @maps_neutral: hash table of offload-neutral maps (on pointer)
119 *
117 * @adjust_head: adjust head capability 120 * @adjust_head: adjust head capability
118 * @adjust_head.flags: extra flags for adjust head 121 * @adjust_head.flags: extra flags for adjust head
119 * @adjust_head.off_min: minimal packet offset within buffer required 122 * @adjust_head.off_min: minimal packet offset within buffer required
@@ -133,6 +136,7 @@ enum pkt_vec {
133 * @helpers.map_lookup: map lookup helper address 136 * @helpers.map_lookup: map lookup helper address
134 * @helpers.map_update: map update helper address 137 * @helpers.map_update: map update helper address
135 * @helpers.map_delete: map delete helper address 138 * @helpers.map_delete: map delete helper address
139 * @helpers.perf_event_output: output perf event to a ring buffer
136 * 140 *
137 * @pseudo_random: FW initialized the pseudo-random machinery (CSRs) 141 * @pseudo_random: FW initialized the pseudo-random machinery (CSRs)
138 */ 142 */
@@ -150,6 +154,8 @@ struct nfp_app_bpf {
150 unsigned int maps_in_use; 154 unsigned int maps_in_use;
151 unsigned int map_elems_in_use; 155 unsigned int map_elems_in_use;
152 156
157 struct rhashtable maps_neutral;
158
153 struct nfp_bpf_cap_adjust_head { 159 struct nfp_bpf_cap_adjust_head {
154 u32 flags; 160 u32 flags;
155 int off_min; 161 int off_min;
@@ -171,6 +177,7 @@ struct nfp_app_bpf {
171 u32 map_lookup; 177 u32 map_lookup;
172 u32 map_update; 178 u32 map_update;
173 u32 map_delete; 179 u32 map_delete;
180 u32 perf_event_output;
174 } helpers; 181 } helpers;
175 182
176 bool pseudo_random; 183 bool pseudo_random;
@@ -199,6 +206,14 @@ struct nfp_bpf_map {
199 enum nfp_bpf_map_use use_map[]; 206 enum nfp_bpf_map_use use_map[];
200}; 207};
201 208
209struct nfp_bpf_neutral_map {
210 struct rhash_head l;
211 struct bpf_map *ptr;
212 u32 count;
213};
214
215extern const struct rhashtable_params nfp_bpf_maps_neutral_params;
216
202struct nfp_prog; 217struct nfp_prog;
203struct nfp_insn_meta; 218struct nfp_insn_meta;
204typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); 219typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
@@ -367,6 +382,8 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
367 * @error: error code if something went wrong 382 * @error: error code if something went wrong
368 * @stack_depth: max stack depth from the verifier 383 * @stack_depth: max stack depth from the verifier
369 * @adjust_head_location: if program has single adjust head call - the insn no. 384 * @adjust_head_location: if program has single adjust head call - the insn no.
385 * @map_records_cnt: the number of map pointers recorded for this prog
386 * @map_records: the map record pointers from bpf->maps_neutral
370 * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) 387 * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
371 */ 388 */
372struct nfp_prog { 389struct nfp_prog {
@@ -390,6 +407,9 @@ struct nfp_prog {
390 unsigned int stack_depth; 407 unsigned int stack_depth;
391 unsigned int adjust_head_location; 408 unsigned int adjust_head_location;
392 409
410 unsigned int map_records_cnt;
411 struct nfp_bpf_neutral_map **map_records;
412
393 struct list_head insns; 413 struct list_head insns;
394}; 414};
395 415
@@ -440,5 +460,7 @@ int nfp_bpf_ctrl_lookup_entry(struct bpf_offloaded_map *offmap,
440int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap, 460int nfp_bpf_ctrl_getnext_entry(struct bpf_offloaded_map *offmap,
441 void *key, void *next_key); 461 void *key, void *next_key);
442 462
463int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb);
464
443void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb); 465void nfp_bpf_ctrl_msg_rx(struct nfp_app *app, struct sk_buff *skb);
444#endif 466#endif
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 42d98792bd25..4db0ac1e42a8 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2016-2017 Netronome Systems, Inc. 2 * Copyright (C) 2016-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -57,6 +57,126 @@
57#include "../nfp_net.h" 57#include "../nfp_net.h"
58 58
59static int 59static int
60nfp_map_ptr_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
61 struct bpf_map *map)
62{
63 struct nfp_bpf_neutral_map *record;
64 int err;
65
66 /* Map record paths are entered via ndo, update side is protected. */
67 ASSERT_RTNL();
68
69 /* Reuse path - other offloaded program is already tracking this map. */
70 record = rhashtable_lookup_fast(&bpf->maps_neutral, &map,
71 nfp_bpf_maps_neutral_params);
72 if (record) {
73 nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
74 record->count++;
75 return 0;
76 }
77
78 /* Grab a single ref to the map for our record. The prog destroy ndo
79 * happens after free_used_maps().
80 */
81 map = bpf_map_inc(map, false);
82 if (IS_ERR(map))
83 return PTR_ERR(map);
84
85 record = kmalloc(sizeof(*record), GFP_KERNEL);
86 if (!record) {
87 err = -ENOMEM;
88 goto err_map_put;
89 }
90
91 record->ptr = map;
92 record->count = 1;
93
94 err = rhashtable_insert_fast(&bpf->maps_neutral, &record->l,
95 nfp_bpf_maps_neutral_params);
96 if (err)
97 goto err_free_rec;
98
99 nfp_prog->map_records[nfp_prog->map_records_cnt++] = record;
100
101 return 0;
102
103err_free_rec:
104 kfree(record);
105err_map_put:
106 bpf_map_put(map);
107 return err;
108}
109
110static void
111nfp_map_ptrs_forget(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog)
112{
113 bool freed = false;
114 int i;
115
116 ASSERT_RTNL();
117
118 for (i = 0; i < nfp_prog->map_records_cnt; i++) {
119 if (--nfp_prog->map_records[i]->count) {
120 nfp_prog->map_records[i] = NULL;
121 continue;
122 }
123
124 WARN_ON(rhashtable_remove_fast(&bpf->maps_neutral,
125 &nfp_prog->map_records[i]->l,
126 nfp_bpf_maps_neutral_params));
127 freed = true;
128 }
129
130 if (freed) {
131 synchronize_rcu();
132
133 for (i = 0; i < nfp_prog->map_records_cnt; i++)
134 if (nfp_prog->map_records[i]) {
135 bpf_map_put(nfp_prog->map_records[i]->ptr);
136 kfree(nfp_prog->map_records[i]);
137 }
138 }
139
140 kfree(nfp_prog->map_records);
141 nfp_prog->map_records = NULL;
142 nfp_prog->map_records_cnt = 0;
143}
144
145static int
146nfp_map_ptrs_record(struct nfp_app_bpf *bpf, struct nfp_prog *nfp_prog,
147 struct bpf_prog *prog)
148{
149 int i, cnt, err;
150
151 /* Quickly count the maps we will have to remember */
152 cnt = 0;
153 for (i = 0; i < prog->aux->used_map_cnt; i++)
154 if (bpf_map_offload_neutral(prog->aux->used_maps[i]))
155 cnt++;
156 if (!cnt)
157 return 0;
158
159 nfp_prog->map_records = kmalloc_array(cnt,
160 sizeof(nfp_prog->map_records[0]),
161 GFP_KERNEL);
162 if (!nfp_prog->map_records)
163 return -ENOMEM;
164
165 for (i = 0; i < prog->aux->used_map_cnt; i++)
166 if (bpf_map_offload_neutral(prog->aux->used_maps[i])) {
167 err = nfp_map_ptr_record(bpf, nfp_prog,
168 prog->aux->used_maps[i]);
169 if (err) {
170 nfp_map_ptrs_forget(bpf, nfp_prog);
171 return err;
172 }
173 }
174 WARN_ON(cnt != nfp_prog->map_records_cnt);
175
176 return 0;
177}
178
179static int
60nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, 180nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
61 unsigned int cnt) 181 unsigned int cnt)
62{ 182{
@@ -151,7 +271,7 @@ static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog)
151 prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64); 271 prog->aux->offload->jited_len = nfp_prog->prog_len * sizeof(u64);
152 prog->aux->offload->jited_image = nfp_prog->prog; 272 prog->aux->offload->jited_image = nfp_prog->prog;
153 273
154 return 0; 274 return nfp_map_ptrs_record(nfp_prog->bpf, nfp_prog, prog);
155} 275}
156 276
157static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) 277static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
@@ -159,6 +279,7 @@ static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog)
159 struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; 279 struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
160 280
161 kvfree(nfp_prog->prog); 281 kvfree(nfp_prog->prog);
282 nfp_map_ptrs_forget(nfp_prog->bpf, nfp_prog);
162 nfp_prog_free(nfp_prog); 283 nfp_prog_free(nfp_prog);
163 284
164 return 0; 285 return 0;
@@ -320,6 +441,53 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
320 } 441 }
321} 442}
322 443
444static unsigned long
445nfp_bpf_perf_event_copy(void *dst, const void *src,
446 unsigned long off, unsigned long len)
447{
448 memcpy(dst, src + off, len);
449 return 0;
450}
451
452int nfp_bpf_event_output(struct nfp_app_bpf *bpf, struct sk_buff *skb)
453{
454 struct cmsg_bpf_event *cbe = (void *)skb->data;
455 u32 pkt_size, data_size;
456 struct bpf_map *map;
457
458 if (skb->len < sizeof(struct cmsg_bpf_event))
459 goto err_drop;
460
461 pkt_size = be32_to_cpu(cbe->pkt_size);
462 data_size = be32_to_cpu(cbe->data_size);
463 map = (void *)(unsigned long)be64_to_cpu(cbe->map_ptr);
464
465 if (skb->len < sizeof(struct cmsg_bpf_event) + pkt_size + data_size)
466 goto err_drop;
467 if (cbe->hdr.ver != CMSG_MAP_ABI_VERSION)
468 goto err_drop;
469
470 rcu_read_lock();
471 if (!rhashtable_lookup_fast(&bpf->maps_neutral, &map,
472 nfp_bpf_maps_neutral_params)) {
473 rcu_read_unlock();
474 pr_warn("perf event: dest map pointer %px not recognized, dropping event\n",
475 map);
476 goto err_drop;
477 }
478
479 bpf_event_output(map, be32_to_cpu(cbe->cpu_id),
480 &cbe->data[round_up(pkt_size, 4)], data_size,
481 cbe->data, pkt_size, nfp_bpf_perf_event_copy);
482 rcu_read_unlock();
483
484 dev_consume_skb_any(skb);
485 return 0;
486err_drop:
487 dev_kfree_skb_any(skb);
488 return -EINVAL;
489}
490
323static int 491static int
324nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, 492nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
325 struct netlink_ext_ack *extack) 493 struct netlink_ext_ack *extack)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 06ad53ce4ad9..e163f3cfa47d 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2016-2017 Netronome Systems, Inc. 2 * Copyright (C) 2016-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -36,6 +36,8 @@
36#include <linux/kernel.h> 36#include <linux/kernel.h>
37#include <linux/pkt_cls.h> 37#include <linux/pkt_cls.h>
38 38
39#include "../nfp_app.h"
40#include "../nfp_main.h"
39#include "fw.h" 41#include "fw.h"
40#include "main.h" 42#include "main.h"
41 43
@@ -149,15 +151,6 @@ nfp_bpf_map_call_ok(const char *fname, struct bpf_verifier_env *env,
149 return false; 151 return false;
150 } 152 }
151 153
152 /* Rest of the checks is only if we re-parse the same insn */
153 if (!meta->func_id)
154 return true;
155
156 if (meta->arg1.map_ptr != reg1->map_ptr) {
157 pr_vlog(env, "%s: called for different map\n", fname);
158 return false;
159 }
160
161 return true; 154 return true;
162} 155}
163 156
@@ -216,6 +209,71 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env,
216 pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n"); 209 pr_vlog(env, "bpf_get_prandom_u32(): FW doesn't support random number generation\n");
217 return -EOPNOTSUPP; 210 return -EOPNOTSUPP;
218 211
212 case BPF_FUNC_perf_event_output:
213 BUILD_BUG_ON(NFP_BPF_SCALAR_VALUE != SCALAR_VALUE ||
214 NFP_BPF_MAP_VALUE != PTR_TO_MAP_VALUE ||
215 NFP_BPF_STACK != PTR_TO_STACK ||
216 NFP_BPF_PACKET_DATA != PTR_TO_PACKET);
217
218 if (!bpf->helpers.perf_event_output) {
219 pr_vlog(env, "event_output: not supported by FW\n");
220 return -EOPNOTSUPP;
221 }
222
223 /* Force current CPU to make sure we can report the event
224 * wherever we get the control message from FW.
225 */
226 if (reg3->var_off.mask & BPF_F_INDEX_MASK ||
227 (reg3->var_off.value & BPF_F_INDEX_MASK) !=
228 BPF_F_CURRENT_CPU) {
229 char tn_buf[48];
230
231 tnum_strn(tn_buf, sizeof(tn_buf), reg3->var_off);
232 pr_vlog(env, "event_output: must use BPF_F_CURRENT_CPU, var_off: %s\n",
233 tn_buf);
234 return -EOPNOTSUPP;
235 }
236
237 /* Save space in meta, we don't care about arguments other
238 * than 4th meta, shove it into arg1.
239 */
240 reg1 = cur_regs(env) + BPF_REG_4;
241
242 if (reg1->type != SCALAR_VALUE /* NULL ptr */ &&
243 reg1->type != PTR_TO_STACK &&
244 reg1->type != PTR_TO_MAP_VALUE &&
245 reg1->type != PTR_TO_PACKET) {
246 pr_vlog(env, "event_output: unsupported ptr type: %d\n",
247 reg1->type);
248 return -EOPNOTSUPP;
249 }
250
251 if (reg1->type == PTR_TO_STACK &&
252 !nfp_bpf_stack_arg_ok("event_output", env, reg1, NULL))
253 return -EOPNOTSUPP;
254
255 /* Warn user that on offload NFP may return success even if map
256 * is not going to accept the event, since the event output is
257 * fully async and device won't know the state of the map.
258 * There is also FW limitation on the event length.
259 *
260 * Lost events will not show up on the perf ring, driver
261 * won't see them at all. Events may also get reordered.
262 */
263 dev_warn_once(&nfp_prog->bpf->app->pf->pdev->dev,
264 "bpf: note: return codes and behavior of bpf_event_output() helper differs for offloaded programs!\n");
265 pr_vlog(env, "warning: return codes and behavior of event_output helper differ for offload!\n");
266
267 if (!meta->func_id)
268 break;
269
270 if (reg1->type != meta->arg1.type) {
271 pr_vlog(env, "event_output: ptr type changed: %d %d\n",
272 meta->arg1.type, reg1->type);
273 return -EINVAL;
274 }
275 break;
276
219 default: 277 default:
220 pr_vlog(env, "unsupported function id: %d\n", func_id); 278 pr_vlog(env, "unsupported function id: %d\n", func_id);
221 return -EOPNOTSUPP; 279 return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 6aedef0ad433..0e0253c7e17b 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0e00a13ff01b..321969da67b7 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -110,6 +110,11 @@ static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
110 return container_of(map, struct bpf_offloaded_map, map); 110 return container_of(map, struct bpf_offloaded_map, map);
111} 111}
112 112
113static inline bool bpf_map_offload_neutral(const struct bpf_map *map)
114{
115 return map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
116}
117
113static inline bool bpf_map_support_seq_show(const struct bpf_map *map) 118static inline bool bpf_map_support_seq_show(const struct bpf_map *map)
114{ 119{
115 return map->ops->map_seq_show_elem && map->ops->map_check_btf; 120 return map->ops->map_seq_show_elem && map->ops->map_check_btf;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1127552c8033..d0d7d9462368 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1719,6 +1719,7 @@ bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
1719{ 1719{
1720 return -ENOTSUPP; 1720 return -ENOTSUPP;
1721} 1721}
1722EXPORT_SYMBOL_GPL(bpf_event_output);
1722 1723
1723/* Always built-in helper functions. */ 1724/* Always built-in helper functions. */
1724const struct bpf_func_proto bpf_tail_call_proto = { 1725const struct bpf_func_proto bpf_tail_call_proto = {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index c9401075b58c..ac747d5cf7c6 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is licensed under the GNU General License Version 2, 4 * This software is licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -474,8 +474,10 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
474 struct bpf_prog_offload *offload; 474 struct bpf_prog_offload *offload;
475 bool ret; 475 bool ret;
476 476
477 if (!bpf_prog_is_dev_bound(prog->aux) || !bpf_map_is_dev_bound(map)) 477 if (!bpf_prog_is_dev_bound(prog->aux))
478 return false; 478 return false;
479 if (!bpf_map_is_dev_bound(map))
480 return bpf_map_offload_neutral(map);
479 481
480 down_read(&bpf_devs_lock); 482 down_read(&bpf_devs_lock);
481 offload = prog->aux->offload; 483 offload = prog->aux->offload;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 263e13ede029..9b87198deea2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -282,6 +282,7 @@ void bpf_map_put(struct bpf_map *map)
282{ 282{
283 __bpf_map_put(map, true); 283 __bpf_map_put(map, true);
284} 284}
285EXPORT_SYMBOL_GPL(bpf_map_put);
285 286
286void bpf_map_put_with_uref(struct bpf_map *map) 287void bpf_map_put_with_uref(struct bpf_map *map)
287{ 288{
@@ -543,6 +544,7 @@ struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
543 atomic_inc(&map->usercnt); 544 atomic_inc(&map->usercnt);
544 return map; 545 return map;
545} 546}
547EXPORT_SYMBOL_GPL(bpf_map_inc);
546 548
547struct bpf_map *bpf_map_get_with_uref(u32 ufd) 549struct bpf_map *bpf_map_get_with_uref(u32 ufd)
548{ 550{
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 37e0affa515e..d5e1a6c4165d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5055,7 +5055,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
5055 /* hold the map. If the program is rejected by verifier, 5055 /* hold the map. If the program is rejected by verifier,
5056 * the map will be released by release_maps() or it 5056 * the map will be released by release_maps() or it
5057 * will be used by the valid program until it's unloaded 5057 * will be used by the valid program until it's unloaded
5058 * and all maps are released in free_bpf_prog_info() 5058 * and all maps are released in free_used_maps()
5059 */ 5059 */
5060 map = bpf_map_inc(map, false); 5060 map = bpf_map_inc(map, false);
5061 if (IS_ERR(map)) { 5061 if (IS_ERR(map)) {
@@ -5741,16 +5741,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
5741 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) 5741 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
5742 env->strict_alignment = true; 5742 env->strict_alignment = true;
5743 5743
5744 ret = replace_map_fd_with_map_ptr(env);
5745 if (ret < 0)
5746 goto skip_full_check;
5747
5744 if (bpf_prog_is_dev_bound(env->prog->aux)) { 5748 if (bpf_prog_is_dev_bound(env->prog->aux)) {
5745 ret = bpf_prog_offload_verifier_prep(env); 5749 ret = bpf_prog_offload_verifier_prep(env);
5746 if (ret) 5750 if (ret)
5747 goto err_unlock; 5751 goto skip_full_check;
5748 } 5752 }
5749 5753
5750 ret = replace_map_fd_with_map_ptr(env);
5751 if (ret < 0)
5752 goto skip_full_check;
5753
5754 env->explored_states = kcalloc(env->prog->len, 5754 env->explored_states = kcalloc(env->prog->len,
5755 sizeof(struct bpf_verifier_state_list *), 5755 sizeof(struct bpf_verifier_state_list *),
5756 GFP_USER); 5756 GFP_USER);
@@ -5821,7 +5821,7 @@ skip_full_check:
5821err_release_maps: 5821err_release_maps:
5822 if (!env->prog->aux->used_maps) 5822 if (!env->prog->aux->used_maps)
5823 /* if we didn't copy map pointers into bpf_prog_info, release 5823 /* if we didn't copy map pointers into bpf_prog_info, release
5824 * them now. Otherwise free_bpf_prog_info() will release them. 5824 * them now. Otherwise free_used_maps() will release them.
5825 */ 5825 */
5826 release_maps(env); 5826 release_maps(env);
5827 *prog = env->prog; 5827 *prog = env->prog;
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 5f512b14bff9..a6258bc8ec4f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -22,17 +22,19 @@ MAP COMMANDS
22============= 22=============
23 23
24| **bpftool** **map { show | list }** [*MAP*] 24| **bpftool** **map { show | list }** [*MAP*]
25| **bpftool** **map dump** *MAP* 25| **bpftool** **map dump** *MAP*
26| **bpftool** **map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] 26| **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
27| **bpftool** **map lookup** *MAP* **key** [**hex**] *BYTES* 27| **bpftool** **map lookup** *MAP* **key** *DATA*
28| **bpftool** **map getnext** *MAP* [**key** [**hex**] *BYTES*] 28| **bpftool** **map getnext** *MAP* [**key** *DATA*]
29| **bpftool** **map delete** *MAP* **key** [**hex**] *BYTES* 29| **bpftool** **map delete** *MAP* **key** *DATA*
30| **bpftool** **map pin** *MAP* *FILE* 30| **bpftool** **map pin** *MAP* *FILE*
31| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
31| **bpftool** **map help** 32| **bpftool** **map help**
32| 33|
33| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } 34| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
35| *DATA* := { [**hex**] *BYTES* }
34| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } 36| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
35| *VALUE* := { *BYTES* | *MAP* | *PROG* } 37| *VALUE* := { *DATA* | *MAP* | *PROG* }
36| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } 38| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
37 39
38DESCRIPTION 40DESCRIPTION
@@ -48,7 +50,7 @@ DESCRIPTION
48 **bpftool map dump** *MAP* 50 **bpftool map dump** *MAP*
49 Dump all entries in a given *MAP*. 51 Dump all entries in a given *MAP*.
50 52
51 **bpftool map update** *MAP* **key** [**hex**] *BYTES* **value** [**hex**] *VALUE* [*UPDATE_FLAGS*] 53 **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
52 Update map entry for a given *KEY*. 54 Update map entry for a given *KEY*.
53 55
54 *UPDATE_FLAGS* can be one of: **any** update existing entry 56 *UPDATE_FLAGS* can be one of: **any** update existing entry
@@ -61,13 +63,13 @@ DESCRIPTION
61 the bytes are parsed as decimal values, unless a "0x" prefix 63 the bytes are parsed as decimal values, unless a "0x" prefix
62 (for hexadecimal) or a "0" prefix (for octal) is provided. 64 (for hexadecimal) or a "0" prefix (for octal) is provided.
63 65
64 **bpftool map lookup** *MAP* **key** [**hex**] *BYTES* 66 **bpftool map lookup** *MAP* **key** *DATA*
65 Lookup **key** in the map. 67 Lookup **key** in the map.
66 68
67 **bpftool map getnext** *MAP* [**key** [**hex**] *BYTES*] 69 **bpftool map getnext** *MAP* [**key** *DATA*]
68 Get next key. If *key* is not specified, get first key. 70 Get next key. If *key* is not specified, get first key.
69 71
70 **bpftool map delete** *MAP* **key** [**hex**] *BYTES* 72 **bpftool map delete** *MAP* **key** *DATA*
71 Remove entry from the map. 73 Remove entry from the map.
72 74
73 **bpftool map pin** *MAP* *FILE* 75 **bpftool map pin** *MAP* *FILE*
@@ -75,6 +77,22 @@ DESCRIPTION
75 77
76 Note: *FILE* must be located in *bpffs* mount. 78 Note: *FILE* must be located in *bpffs* mount.
77 79
80 **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
81 Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
82
83 Install perf rings into a perf event array map and dump
84 output of any bpf_perf_event_output() call in the kernel.
85 By default read the number of CPUs on the system and
86 install perf ring for each CPU in the corresponding index
87 in the array.
88
89 If **cpu** and **index** are specified, install perf ring
90 for given **cpu** at **index** in the array (single ring).
91
92 Note that installing a perf ring into an array will silently
93 replace any existing ring. Any other application will stop
94 receiving events if it installed its rings earlier.
95
78 **bpftool map help** 96 **bpftool map help**
79 Print short help message. 97 Print short help message.
80 98
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 20689a321ffe..564cb0d9692b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -23,7 +23,7 @@ SYNOPSIS
23 23
24 *MAP-COMMANDS* := 24 *MAP-COMMANDS* :=
25 { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete** 25 { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
26 | **pin** | **help** } 26 | **pin** | **event_pipe** | **help** }
27 27
28 *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** 28 *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
29 | **load** | **help** } 29 | **load** | **help** }
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 4e69782c4a79..892dbf095bff 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -39,7 +39,12 @@ CC = gcc
39 39
40CFLAGS += -O2 40CFLAGS += -O2
41CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers 41CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
42CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/ 42CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
43 -I$(srctree)/kernel/bpf/ \
44 -I$(srctree)/tools/include \
45 -I$(srctree)/tools/include/uapi \
46 -I$(srctree)/tools/lib/bpf \
47 -I$(srctree)/tools/perf
43CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' 48CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
44LIBS = -lelf -lbfd -lopcodes $(LIBBPF) 49LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
45 50
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 852d84a98acd..b301c9b315f1 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1,6 +1,6 @@
1# bpftool(8) bash completion -*- shell-script -*- 1# bpftool(8) bash completion -*- shell-script -*-
2# 2#
3# Copyright (C) 2017 Netronome Systems, Inc. 3# Copyright (C) 2017-2018 Netronome Systems, Inc.
4# 4#
5# This software is dual licensed under the GNU General License 5# This software is dual licensed under the GNU General License
6# Version 2, June 1991 as shown in the file COPYING in the top-level 6# Version 2, June 1991 as shown in the file COPYING in the top-level
@@ -79,6 +79,14 @@ _bpftool_get_map_ids()
79 command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) 79 command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
80} 80}
81 81
82_bpftool_get_perf_map_ids()
83{
84 COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
85 command grep -C2 perf_event_array | \
86 command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
87}
88
89
82_bpftool_get_prog_ids() 90_bpftool_get_prog_ids()
83{ 91{
84 COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ 92 COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
@@ -359,10 +367,34 @@ _bpftool()
359 fi 367 fi
360 return 0 368 return 0
361 ;; 369 ;;
370 event_pipe)
371 case $prev in
372 $command)
373 COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
374 return 0
375 ;;
376 id)
377 _bpftool_get_perf_map_ids
378 return 0
379 ;;
380 cpu)
381 return 0
382 ;;
383 index)
384 return 0
385 ;;
386 *)
387 _bpftool_once_attr 'cpu'
388 _bpftool_once_attr 'index'
389 return 0
390 ;;
391 esac
392 ;;
362 *) 393 *)
363 [[ $prev == $object ]] && \ 394 [[ $prev == $object ]] && \
364 COMPREPLY=( $( compgen -W 'delete dump getnext help \ 395 COMPREPLY=( $( compgen -W 'delete dump getnext help \
365 lookup pin show list update' -- "$cur" ) ) 396 lookup pin event_pipe show list update' -- \
397 "$cur" ) )
366 ;; 398 ;;
367 esac 399 esac
368 ;; 400 ;;
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 465995281dcd..32f9e397a6c0 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -33,6 +33,7 @@
33 33
34/* Author: Jakub Kicinski <kubakici@wp.pl> */ 34/* Author: Jakub Kicinski <kubakici@wp.pl> */
35 35
36#include <ctype.h>
36#include <errno.h> 37#include <errno.h>
37#include <fcntl.h> 38#include <fcntl.h>
38#include <fts.h> 39#include <fts.h>
@@ -330,6 +331,16 @@ char *get_fdinfo(int fd, const char *key)
330 return NULL; 331 return NULL;
331} 332}
332 333
334void print_data_json(uint8_t *data, size_t len)
335{
336 unsigned int i;
337
338 jsonw_start_array(json_wtr);
339 for (i = 0; i < len; i++)
340 jsonw_printf(json_wtr, "%d", data[i]);
341 jsonw_end_array(json_wtr);
342}
343
333void print_hex_data_json(uint8_t *data, size_t len) 344void print_hex_data_json(uint8_t *data, size_t len)
334{ 345{
335 unsigned int i; 346 unsigned int i;
@@ -420,6 +431,70 @@ void delete_pinned_obj_table(struct pinned_obj_table *tab)
420 } 431 }
421} 432}
422 433
434unsigned int get_page_size(void)
435{
436 static int result;
437
438 if (!result)
439 result = getpagesize();
440 return result;
441}
442
443unsigned int get_possible_cpus(void)
444{
445 static unsigned int result;
446 char buf[128];
447 long int n;
448 char *ptr;
449 int fd;
450
451 if (result)
452 return result;
453
454 fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
455 if (fd < 0) {
456 p_err("can't open sysfs possible cpus");
457 exit(-1);
458 }
459
460 n = read(fd, buf, sizeof(buf));
461 if (n < 2) {
462 p_err("can't read sysfs possible cpus");
463 exit(-1);
464 }
465 close(fd);
466
467 if (n == sizeof(buf)) {
468 p_err("read sysfs possible cpus overflow");
469 exit(-1);
470 }
471
472 ptr = buf;
473 n = 0;
474 while (*ptr && *ptr != '\n') {
475 unsigned int a, b;
476
477 if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
478 n += b - a + 1;
479
480 ptr = strchr(ptr, '-') + 1;
481 } else if (sscanf(ptr, "%u", &a) == 1) {
482 n++;
483 } else {
484 assert(0);
485 }
486
487 while (isdigit(*ptr))
488 ptr++;
489 if (*ptr == ',')
490 ptr++;
491 }
492
493 result = n;
494
495 return result;
496}
497
423static char * 498static char *
424ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf) 499ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
425{ 500{
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index b8e9584d6246..6173cd997e7a 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -117,14 +117,19 @@ int do_pin_fd(int fd, const char *name);
117 117
118int do_prog(int argc, char **arg); 118int do_prog(int argc, char **arg);
119int do_map(int argc, char **arg); 119int do_map(int argc, char **arg);
120int do_event_pipe(int argc, char **argv);
120int do_cgroup(int argc, char **arg); 121int do_cgroup(int argc, char **arg);
121 122
122int prog_parse_fd(int *argc, char ***argv); 123int prog_parse_fd(int *argc, char ***argv);
124int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
123 125
124void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, 126void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
125 const char *arch); 127 const char *arch);
128void print_data_json(uint8_t *data, size_t len);
126void print_hex_data_json(uint8_t *data, size_t len); 129void print_hex_data_json(uint8_t *data, size_t len);
127 130
131unsigned int get_page_size(void);
132unsigned int get_possible_cpus(void);
128const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino); 133const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
129 134
130#endif 135#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index a6cdb640a0d7..af6766e956ba 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2017 Netronome Systems, Inc. 2 * Copyright (C) 2017-2018 Netronome Systems, Inc.
3 * 3 *
4 * This software is dual licensed under the GNU General License Version 2, 4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this 5 * June 1991 as shown in the file COPYING in the top-level directory of this
@@ -34,7 +34,6 @@
34/* Author: Jakub Kicinski <kubakici@wp.pl> */ 34/* Author: Jakub Kicinski <kubakici@wp.pl> */
35 35
36#include <assert.h> 36#include <assert.h>
37#include <ctype.h>
38#include <errno.h> 37#include <errno.h>
39#include <fcntl.h> 38#include <fcntl.h>
40#include <stdbool.h> 39#include <stdbool.h>
@@ -69,61 +68,6 @@ static const char * const map_type_name[] = {
69 [BPF_MAP_TYPE_CPUMAP] = "cpumap", 68 [BPF_MAP_TYPE_CPUMAP] = "cpumap",
70}; 69};
71 70
72static unsigned int get_possible_cpus(void)
73{
74 static unsigned int result;
75 char buf[128];
76 long int n;
77 char *ptr;
78 int fd;
79
80 if (result)
81 return result;
82
83 fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
84 if (fd < 0) {
85 p_err("can't open sysfs possible cpus");
86 exit(-1);
87 }
88
89 n = read(fd, buf, sizeof(buf));
90 if (n < 2) {
91 p_err("can't read sysfs possible cpus");
92 exit(-1);
93 }
94 close(fd);
95
96 if (n == sizeof(buf)) {
97 p_err("read sysfs possible cpus overflow");
98 exit(-1);
99 }
100
101 ptr = buf;
102 n = 0;
103 while (*ptr && *ptr != '\n') {
104 unsigned int a, b;
105
106 if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
107 n += b - a + 1;
108
109 ptr = strchr(ptr, '-') + 1;
110 } else if (sscanf(ptr, "%u", &a) == 1) {
111 n++;
112 } else {
113 assert(0);
114 }
115
116 while (isdigit(*ptr))
117 ptr++;
118 if (*ptr == ',')
119 ptr++;
120 }
121
122 result = n;
123
124 return result;
125}
126
127static bool map_is_per_cpu(__u32 type) 71static bool map_is_per_cpu(__u32 type)
128{ 72{
129 return type == BPF_MAP_TYPE_PERCPU_HASH || 73 return type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -186,8 +130,7 @@ static int map_parse_fd(int *argc, char ***argv)
186 return -1; 130 return -1;
187} 131}
188 132
189static int 133int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
190map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
191{ 134{
192 int err; 135 int err;
193 int fd; 136 int fd;
@@ -873,23 +816,25 @@ static int do_help(int argc, char **argv)
873 816
874 fprintf(stderr, 817 fprintf(stderr,
875 "Usage: %s %s { show | list } [MAP]\n" 818 "Usage: %s %s { show | list } [MAP]\n"
876 " %s %s dump MAP\n" 819 " %s %s dump MAP\n"
877 " %s %s update MAP key [hex] BYTES value [hex] VALUE [UPDATE_FLAGS]\n" 820 " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n"
878 " %s %s lookup MAP key [hex] BYTES\n" 821 " %s %s lookup MAP key DATA\n"
879 " %s %s getnext MAP [key [hex] BYTES]\n" 822 " %s %s getnext MAP [key DATA]\n"
880 " %s %s delete MAP key [hex] BYTES\n" 823 " %s %s delete MAP key DATA\n"
881 " %s %s pin MAP FILE\n" 824 " %s %s pin MAP FILE\n"
825 " %s %s event_pipe MAP [cpu N index M]\n"
882 " %s %s help\n" 826 " %s %s help\n"
883 "\n" 827 "\n"
884 " MAP := { id MAP_ID | pinned FILE }\n" 828 " MAP := { id MAP_ID | pinned FILE }\n"
829 " DATA := { [hex] BYTES }\n"
885 " " HELP_SPEC_PROGRAM "\n" 830 " " HELP_SPEC_PROGRAM "\n"
886 " VALUE := { BYTES | MAP | PROG }\n" 831 " VALUE := { DATA | MAP | PROG }\n"
887 " UPDATE_FLAGS := { any | exist | noexist }\n" 832 " UPDATE_FLAGS := { any | exist | noexist }\n"
888 " " HELP_SPEC_OPTIONS "\n" 833 " " HELP_SPEC_OPTIONS "\n"
889 "", 834 "",
890 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], 835 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
891 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], 836 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
892 bin_name, argv[-2], bin_name, argv[-2]); 837 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
893 838
894 return 0; 839 return 0;
895} 840}
@@ -904,6 +849,7 @@ static const struct cmd cmds[] = {
904 { "getnext", do_getnext }, 849 { "getnext", do_getnext },
905 { "delete", do_delete }, 850 { "delete", do_delete },
906 { "pin", do_pin }, 851 { "pin", do_pin },
852 { "event_pipe", do_event_pipe },
907 { 0 } 853 { 0 }
908}; 854};
909 855
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
new file mode 100644
index 000000000000..c5a2ced8552d
--- /dev/null
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -0,0 +1,347 @@
1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (C) 2018 Netronome Systems, Inc. */
3/* This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <errno.h>
8#include <fcntl.h>
9#include <libbpf.h>
10#include <poll.h>
11#include <signal.h>
12#include <stdbool.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <time.h>
17#include <unistd.h>
18#include <linux/bpf.h>
19#include <linux/perf_event.h>
20#include <sys/ioctl.h>
21#include <sys/mman.h>
22#include <sys/syscall.h>
23
24#include <bpf.h>
25#include <perf-sys.h>
26
27#include "main.h"
28
29#define MMAP_PAGE_CNT 16
30
31static bool stop;
32
33struct event_ring_info {
34 int fd;
35 int key;
36 unsigned int cpu;
37 void *mem;
38};
39
40struct perf_event_sample {
41 struct perf_event_header header;
42 __u32 size;
43 unsigned char data[];
44};
45
46static void int_exit(int signo)
47{
48 fprintf(stderr, "Stopping...\n");
49 stop = true;
50}
51
52static void
53print_bpf_output(struct event_ring_info *ring, struct perf_event_sample *e)
54{
55 struct {
56 struct perf_event_header header;
57 __u64 id;
58 __u64 lost;
59 } *lost = (void *)e;
60 struct timespec ts;
61
62 if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
63 perror("Can't read clock for timestamp");
64 return;
65 }
66
67 if (json_output) {
68 jsonw_start_object(json_wtr);
69 jsonw_name(json_wtr, "timestamp");
70 jsonw_uint(json_wtr, ts.tv_sec * 1000000000ull + ts.tv_nsec);
71 jsonw_name(json_wtr, "type");
72 jsonw_uint(json_wtr, e->header.type);
73 jsonw_name(json_wtr, "cpu");
74 jsonw_uint(json_wtr, ring->cpu);
75 jsonw_name(json_wtr, "index");
76 jsonw_uint(json_wtr, ring->key);
77 if (e->header.type == PERF_RECORD_SAMPLE) {
78 jsonw_name(json_wtr, "data");
79 print_data_json(e->data, e->size);
80 } else if (e->header.type == PERF_RECORD_LOST) {
81 jsonw_name(json_wtr, "lost");
82 jsonw_start_object(json_wtr);
83 jsonw_name(json_wtr, "id");
84 jsonw_uint(json_wtr, lost->id);
85 jsonw_name(json_wtr, "count");
86 jsonw_uint(json_wtr, lost->lost);
87 jsonw_end_object(json_wtr);
88 }
89 jsonw_end_object(json_wtr);
90 } else {
91 if (e->header.type == PERF_RECORD_SAMPLE) {
92 printf("== @%ld.%ld CPU: %d index: %d =====\n",
93 (long)ts.tv_sec, ts.tv_nsec,
94 ring->cpu, ring->key);
95 fprint_hex(stdout, e->data, e->size, " ");
96 printf("\n");
97 } else if (e->header.type == PERF_RECORD_LOST) {
98 printf("lost %lld events\n", lost->lost);
99 } else {
100 printf("unknown event type=%d size=%d\n",
101 e->header.type, e->header.size);
102 }
103 }
104}
105
106static void
107perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
108{
109 volatile struct perf_event_mmap_page *header = ring->mem;
110 __u64 buffer_size = MMAP_PAGE_CNT * get_page_size();
111 __u64 data_tail = header->data_tail;
112 __u64 data_head = header->data_head;
113 void *base, *begin, *end;
114
115 asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
116 if (data_head == data_tail)
117 return;
118
119 base = ((char *)header) + get_page_size();
120
121 begin = base + data_tail % buffer_size;
122 end = base + data_head % buffer_size;
123
124 while (begin != end) {
125 struct perf_event_sample *e;
126
127 e = begin;
128 if (begin + e->header.size > base + buffer_size) {
129 long len = base + buffer_size - begin;
130
131 if (*buf_len < e->header.size) {
132 free(*buf);
133 *buf = malloc(e->header.size);
134 if (!*buf) {
135 fprintf(stderr,
136 "can't allocate memory");
137 stop = true;
138 return;
139 }
140 *buf_len = e->header.size;
141 }
142
143 memcpy(*buf, begin, len);
144 memcpy(*buf + len, base, e->header.size - len);
145 e = (void *)*buf;
146 begin = base + e->header.size - len;
147 } else if (begin + e->header.size == base + buffer_size) {
148 begin = base;
149 } else {
150 begin += e->header.size;
151 }
152
153 print_bpf_output(ring, e);
154 }
155
156 __sync_synchronize(); /* smp_mb() */
157 header->data_tail = data_head;
158}
159
160static int perf_mmap_size(void)
161{
162 return get_page_size() * (MMAP_PAGE_CNT + 1);
163}
164
165static void *perf_event_mmap(int fd)
166{
167 int mmap_size = perf_mmap_size();
168 void *base;
169
170 base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
171 if (base == MAP_FAILED) {
172 p_err("event mmap failed: %s\n", strerror(errno));
173 return NULL;
174 }
175
176 return base;
177}
178
179static void perf_event_unmap(void *mem)
180{
181 if (munmap(mem, perf_mmap_size()))
182 fprintf(stderr, "Can't unmap ring memory!\n");
183}
184
185static int bpf_perf_event_open(int map_fd, int key, int cpu)
186{
187 struct perf_event_attr attr = {
188 .sample_type = PERF_SAMPLE_RAW,
189 .type = PERF_TYPE_SOFTWARE,
190 .config = PERF_COUNT_SW_BPF_OUTPUT,
191 };
192 int pmu_fd;
193
194 pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
195 if (pmu_fd < 0) {
196 p_err("failed to open perf event %d for CPU %d", key, cpu);
197 return -1;
198 }
199
200 if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
201 p_err("failed to update map for event %d for CPU %d", key, cpu);
202 goto err_close;
203 }
204 if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
205 p_err("failed to enable event %d for CPU %d", key, cpu);
206 goto err_close;
207 }
208
209 return pmu_fd;
210
211err_close:
212 close(pmu_fd);
213 return -1;
214}
215
216int do_event_pipe(int argc, char **argv)
217{
218 int i, nfds, map_fd, index = -1, cpu = -1;
219 struct bpf_map_info map_info = {};
220 struct event_ring_info *rings;
221 size_t tmp_buf_sz = 0;
222 void *tmp_buf = NULL;
223 struct pollfd *pfds;
224 __u32 map_info_len;
225 bool do_all = true;
226
227 map_info_len = sizeof(map_info);
228 map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
229 if (map_fd < 0)
230 return -1;
231
232 if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
233 p_err("map is not a perf event array");
234 goto err_close_map;
235 }
236
237 while (argc) {
238 if (argc < 2)
239 BAD_ARG();
240
241 if (is_prefix(*argv, "cpu")) {
242 char *endptr;
243
244 NEXT_ARG();
245 cpu = strtoul(*argv, &endptr, 0);
246 if (*endptr) {
247 p_err("can't parse %s as CPU ID", **argv);
248 goto err_close_map;
249 }
250
251 NEXT_ARG();
252 } else if (is_prefix(*argv, "index")) {
253 char *endptr;
254
255 NEXT_ARG();
256 index = strtoul(*argv, &endptr, 0);
257 if (*endptr) {
258 p_err("can't parse %s as index", **argv);
259 goto err_close_map;
260 }
261
262 NEXT_ARG();
263 } else {
264 BAD_ARG();
265 }
266
267 do_all = false;
268 }
269
270 if (!do_all) {
271 if (index == -1 || cpu == -1) {
272 p_err("cpu and index must be specified together");
273 goto err_close_map;
274 }
275
276 nfds = 1;
277 } else {
278 nfds = min(get_possible_cpus(), map_info.max_entries);
279 cpu = 0;
280 index = 0;
281 }
282
283 rings = calloc(nfds, sizeof(rings[0]));
284 if (!rings)
285 goto err_close_map;
286
287 pfds = calloc(nfds, sizeof(pfds[0]));
288 if (!pfds)
289 goto err_free_rings;
290
291 for (i = 0; i < nfds; i++) {
292 rings[i].cpu = cpu + i;
293 rings[i].key = index + i;
294
295 rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
296 rings[i].cpu);
297 if (rings[i].fd < 0)
298 goto err_close_fds_prev;
299
300 rings[i].mem = perf_event_mmap(rings[i].fd);
301 if (!rings[i].mem)
302 goto err_close_fds_current;
303
304 pfds[i].fd = rings[i].fd;
305 pfds[i].events = POLLIN;
306 }
307
308 signal(SIGINT, int_exit);
309 signal(SIGHUP, int_exit);
310 signal(SIGTERM, int_exit);
311
312 if (json_output)
313 jsonw_start_array(json_wtr);
314
315 while (!stop) {
316 poll(pfds, nfds, 200);
317 for (i = 0; i < nfds; i++)
318 perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
319 }
320 free(tmp_buf);
321
322 if (json_output)
323 jsonw_end_array(json_wtr);
324
325 for (i = 0; i < nfds; i++) {
326 perf_event_unmap(rings[i].mem);
327 close(rings[i].fd);
328 }
329 free(pfds);
330 free(rings);
331 close(map_fd);
332
333 return 0;
334
335err_close_fds_prev:
336 while (i--) {
337 perf_event_unmap(rings[i].mem);
338err_close_fds_current:
339 close(rings[i].fd);
340 }
341 free(pfds);
342err_free_rings:
343 free(rings);
344err_close_map:
345 close(map_fd);
346 return -1;
347}