summaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-02-25 17:28:39 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2019-02-27 11:22:50 -0500
commit492ecee892c2a4ba6a14903d5d586ff750b7e805 (patch)
tree6161a74e75b41fdb94944cd5451e8589da3cab7c /kernel/bpf
parent143bdc2e27b44d2559596424bfb017d578be33eb (diff)
bpf: enable program stats
JITed BPF programs are indistinguishable from kernel functions, but unlike kernel code BPF code can be changed often. Typical approach of "perf record" + "perf report" profiling and tuning of kernel code works just as well for BPF programs, but kernel code doesn't need to be monitored whereas BPF programs do. Users load and run large amount of BPF programs. These BPF stats allow tools monitor the usage of BPF on the server. The monitoring tools will turn sysctl kernel.bpf_stats_enabled on and off for few seconds to sample average cost of the programs. Aggregated data over hours and days will provide an insight into cost of BPF and alarms can trigger in case given program suddenly gets more expensive. The cost of two sched_clock() per program invocation adds ~20 nsec. Fast BPF progs (like selftests/bpf/progs/test_pkt_access.c) will slow down from ~10 nsec to ~30 nsec. static_key minimizes the cost of the stats collection. There is no measurable difference before/after this patch with kernel.bpf_stats_enabled=0 Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/core.c31
-rw-r--r--kernel/bpf/syscall.c34
-rw-r--r--kernel/bpf/verifier.c7
3 files changed, 67 insertions, 5 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ef88b167959d..1c14c347f3cf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
78 return NULL; 78 return NULL;
79} 79}
80 80
81struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) 81struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
82{ 82{
83 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; 83 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
84 struct bpf_prog_aux *aux; 84 struct bpf_prog_aux *aux;
@@ -104,6 +104,26 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
104 104
105 return fp; 105 return fp;
106} 106}
107
108struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
109{
110 gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
111 struct bpf_prog *prog;
112
113 prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
114 if (!prog)
115 return NULL;
116
117 prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
118 if (!prog->aux->stats) {
119 kfree(prog->aux);
120 vfree(prog);
121 return NULL;
122 }
123
124 u64_stats_init(&prog->aux->stats->syncp);
125 return prog;
126}
107EXPORT_SYMBOL_GPL(bpf_prog_alloc); 127EXPORT_SYMBOL_GPL(bpf_prog_alloc);
108 128
109int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) 129int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
@@ -231,7 +251,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
231 251
232void __bpf_prog_free(struct bpf_prog *fp) 252void __bpf_prog_free(struct bpf_prog *fp)
233{ 253{
234 kfree(fp->aux); 254 if (fp->aux) {
255 free_percpu(fp->aux->stats);
256 kfree(fp->aux);
257 }
235 vfree(fp); 258 vfree(fp);
236} 259}
237 260
@@ -2069,6 +2092,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
2069 return -EFAULT; 2092 return -EFAULT;
2070} 2093}
2071 2094
2095DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
2096EXPORT_SYMBOL(bpf_stats_enabled_key);
2097int sysctl_bpf_stats_enabled __read_mostly;
2098
2072/* All definitions of tracepoints related to BPF. */ 2099/* All definitions of tracepoints related to BPF. */
2073#define CREATE_TRACE_POINTS 2100#define CREATE_TRACE_POINTS
2074#include <linux/bpf_trace.h> 2101#include <linux/bpf_trace.h>
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ec7c552af76b..31cf66fc3f5c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
1283 return 0; 1283 return 0;
1284} 1284}
1285 1285
1286static void bpf_prog_get_stats(const struct bpf_prog *prog,
1287 struct bpf_prog_stats *stats)
1288{
1289 u64 nsecs = 0, cnt = 0;
1290 int cpu;
1291
1292 for_each_possible_cpu(cpu) {
1293 const struct bpf_prog_stats *st;
1294 unsigned int start;
1295 u64 tnsecs, tcnt;
1296
1297 st = per_cpu_ptr(prog->aux->stats, cpu);
1298 do {
1299 start = u64_stats_fetch_begin_irq(&st->syncp);
1300 tnsecs = st->nsecs;
1301 tcnt = st->cnt;
1302 } while (u64_stats_fetch_retry_irq(&st->syncp, start));
1303 nsecs += tnsecs;
1304 cnt += tcnt;
1305 }
1306 stats->nsecs = nsecs;
1307 stats->cnt = cnt;
1308}
1309
1286#ifdef CONFIG_PROC_FS 1310#ifdef CONFIG_PROC_FS
1287static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) 1311static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
1288{ 1312{
1289 const struct bpf_prog *prog = filp->private_data; 1313 const struct bpf_prog *prog = filp->private_data;
1290 char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 1314 char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
1315 struct bpf_prog_stats stats;
1291 1316
1317 bpf_prog_get_stats(prog, &stats);
1292 bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 1318 bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
1293 seq_printf(m, 1319 seq_printf(m,
1294 "prog_type:\t%u\n" 1320 "prog_type:\t%u\n"
1295 "prog_jited:\t%u\n" 1321 "prog_jited:\t%u\n"
1296 "prog_tag:\t%s\n" 1322 "prog_tag:\t%s\n"
1297 "memlock:\t%llu\n" 1323 "memlock:\t%llu\n"
1298 "prog_id:\t%u\n", 1324 "prog_id:\t%u\n"
1325 "run_time_ns:\t%llu\n"
1326 "run_cnt:\t%llu\n",
1299 prog->type, 1327 prog->type,
1300 prog->jited, 1328 prog->jited,
1301 prog_tag, 1329 prog_tag,
1302 prog->pages * 1ULL << PAGE_SHIFT, 1330 prog->pages * 1ULL << PAGE_SHIFT,
1303 prog->aux->id); 1331 prog->aux->id,
1332 stats.nsecs,
1333 stats.cnt);
1304} 1334}
1305#endif 1335#endif
1306 1336
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1b9496c41383..0e4edd7e3c5f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
7320 subprog_end = env->subprog_info[i + 1].start; 7320 subprog_end = env->subprog_info[i + 1].start;
7321 7321
7322 len = subprog_end - subprog_start; 7322 len = subprog_end - subprog_start;
7323 func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); 7323 /* BPF_PROG_RUN doesn't call subprogs directly,
7324 * hence main prog stats include the runtime of subprogs.
7325 * subprogs don't have IDs and not reachable via prog_get_next_id
7326 * func[i]->aux->stats will never be accessed and stays NULL
7327 */
7328 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
7324 if (!func[i]) 7329 if (!func[i])
7325 goto out_free; 7330 goto out_free;
7326 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], 7331 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],