aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sysctl.c
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-02-25 17:28:39 -0500
committerDaniel Borkmann <daniel@iogearbox.net>2019-02-27 11:22:50 -0500
commit492ecee892c2a4ba6a14903d5d586ff750b7e805 (patch)
tree6161a74e75b41fdb94944cd5451e8589da3cab7c /kernel/sysctl.c
parent143bdc2e27b44d2559596424bfb017d578be33eb (diff)
bpf: enable program stats
JITed BPF programs are indistinguishable from kernel functions, but unlike kernel code BPF code can be changed often. Typical approach of "perf record" + "perf report" profiling and tuning of kernel code works just as well for BPF programs, but kernel code doesn't need to be monitored whereas BPF programs do. Users load and run large amount of BPF programs. These BPF stats allow tools monitor the usage of BPF on the server. The monitoring tools will turn sysctl kernel.bpf_stats_enabled on and off for few seconds to sample average cost of the programs. Aggregated data over hours and days will provide an insight into cost of BPF and alarms can trigger in case given program suddenly gets more expensive. The cost of two sched_clock() per program invocation adds ~20 nsec. Fast BPF progs (like selftests/bpf/progs/test_pkt_access.c) will slow down from ~10 nsec to ~30 nsec. static_key minimizes the cost of the stats collection. There is no measurable difference before/after this patch with kernel.bpf_stats_enabled=0 Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/sysctl.c')
-rw-r--r--kernel/sysctl.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ba4d9e85feb8..86e0771352f2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
224#endif 224#endif
225static int proc_dopipe_max_size(struct ctl_table *table, int write, 225static int proc_dopipe_max_size(struct ctl_table *table, int write,
226 void __user *buffer, size_t *lenp, loff_t *ppos); 226 void __user *buffer, size_t *lenp, loff_t *ppos);
227static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
228 void __user *buffer, size_t *lenp,
229 loff_t *ppos);
227 230
228#ifdef CONFIG_MAGIC_SYSRQ 231#ifdef CONFIG_MAGIC_SYSRQ
229/* Note: sysrq code uses its own private copy */ 232/* Note: sysrq code uses its own private copy */
@@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = {
1230 .extra2 = &one, 1233 .extra2 = &one,
1231 }, 1234 },
1232#endif 1235#endif
1236 {
1237 .procname = "bpf_stats_enabled",
1238 .data = &sysctl_bpf_stats_enabled,
1239 .maxlen = sizeof(sysctl_bpf_stats_enabled),
1240 .mode = 0644,
1241 .proc_handler = proc_dointvec_minmax_bpf_stats,
1242 .extra1 = &zero,
1243 .extra2 = &one,
1244 },
1233#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) 1245#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1234 { 1246 {
1235 .procname = "panic_on_rcu_stall", 1247 .procname = "panic_on_rcu_stall",
@@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3260 3272
3261#endif /* CONFIG_PROC_SYSCTL */ 3273#endif /* CONFIG_PROC_SYSCTL */
3262 3274
3275static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
3276 void __user *buffer, size_t *lenp,
3277 loff_t *ppos)
3278{
3279 int ret, bpf_stats = *(int *)table->data;
3280 struct ctl_table tmp = *table;
3281
3282 if (write && !capable(CAP_SYS_ADMIN))
3283 return -EPERM;
3284
3285 tmp.data = &bpf_stats;
3286 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
3287 if (write && !ret) {
3288 *(int *)table->data = bpf_stats;
3289 if (bpf_stats)
3290 static_branch_enable(&bpf_stats_enabled_key);
3291 else
3292 static_branch_disable(&bpf_stats_enabled_key);
3293 }
3294 return ret;
3295}
3296
3263/* 3297/*
3264 * No sense putting this after each symbol definition, twice, 3298 * No sense putting this after each symbol definition, twice,
3265 * exception granted :-) 3299 * exception granted :-)