diff options
| author | Daniel Borkmann <daniel@iogearbox.net> | 2019-02-27 11:22:51 -0500 |
|---|---|---|
| committer | Daniel Borkmann <daniel@iogearbox.net> | 2019-02-27 11:22:52 -0500 |
| commit | da4e023e452801204f645a502e87352d7f17689d (patch) | |
| tree | 8676141fa3057f59651204c33860ec24fecf19f9 /kernel | |
| parent | 143bdc2e27b44d2559596424bfb017d578be33eb (diff) | |
| parent | 88ad472b8a4ad2292d11835652462fd9f745245e (diff) | |
Merge branch 'bpf-prog-stats'
Alexei Starovoitov says:
====================
Introduce per program stats to monitor the usage BPF.
v2->v3:
- rename to run_time_ns/run_cnt everywhere
v1->v2:
- fixed u64 stats on 32-bit archs. Thanks Eric
- use more verbose run_time_ns in json output as suggested by Andrii
- refactored prog_alloc and clarified behavior of stats in subprogs
====================
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/core.c | 31 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 39 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 7 | ||||
| -rw-r--r-- | kernel/sysctl.c | 34 |
4 files changed, 106 insertions, 5 deletions
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ef88b167959d..1c14c347f3cf 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
| @@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns | |||
| 78 | return NULL; | 78 | return NULL; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) | 81 | struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) |
| 82 | { | 82 | { |
| 83 | gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; | 83 | gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; |
| 84 | struct bpf_prog_aux *aux; | 84 | struct bpf_prog_aux *aux; |
| @@ -104,6 +104,26 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) | |||
| 104 | 104 | ||
| 105 | return fp; | 105 | return fp; |
| 106 | } | 106 | } |
| 107 | |||
| 108 | struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) | ||
| 109 | { | ||
| 110 | gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; | ||
| 111 | struct bpf_prog *prog; | ||
| 112 | |||
| 113 | prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags); | ||
| 114 | if (!prog) | ||
| 115 | return NULL; | ||
| 116 | |||
| 117 | prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); | ||
| 118 | if (!prog->aux->stats) { | ||
| 119 | kfree(prog->aux); | ||
| 120 | vfree(prog); | ||
| 121 | return NULL; | ||
| 122 | } | ||
| 123 | |||
| 124 | u64_stats_init(&prog->aux->stats->syncp); | ||
| 125 | return prog; | ||
| 126 | } | ||
| 107 | EXPORT_SYMBOL_GPL(bpf_prog_alloc); | 127 | EXPORT_SYMBOL_GPL(bpf_prog_alloc); |
| 108 | 128 | ||
| 109 | int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) | 129 | int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) |
| @@ -231,7 +251,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, | |||
| 231 | 251 | ||
| 232 | void __bpf_prog_free(struct bpf_prog *fp) | 252 | void __bpf_prog_free(struct bpf_prog *fp) |
| 233 | { | 253 | { |
| 234 | kfree(fp->aux); | 254 | if (fp->aux) { |
| 255 | free_percpu(fp->aux->stats); | ||
| 256 | kfree(fp->aux); | ||
| 257 | } | ||
| 235 | vfree(fp); | 258 | vfree(fp); |
| 236 | } | 259 | } |
| 237 | 260 | ||
| @@ -2069,6 +2092,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, | |||
| 2069 | return -EFAULT; | 2092 | return -EFAULT; |
| 2070 | } | 2093 | } |
| 2071 | 2094 | ||
| 2095 | DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); | ||
| 2096 | EXPORT_SYMBOL(bpf_stats_enabled_key); | ||
| 2097 | int sysctl_bpf_stats_enabled __read_mostly; | ||
| 2098 | |||
| 2072 | /* All definitions of tracepoints related to BPF. */ | 2099 | /* All definitions of tracepoints related to BPF. */ |
| 2073 | #define CREATE_TRACE_POINTS | 2100 | #define CREATE_TRACE_POINTS |
| 2074 | #include <linux/bpf_trace.h> | 2101 | #include <linux/bpf_trace.h> |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ec7c552af76b..174581dfe225 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
| @@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) | |||
| 1283 | return 0; | 1283 | return 0; |
| 1284 | } | 1284 | } |
| 1285 | 1285 | ||
| 1286 | static void bpf_prog_get_stats(const struct bpf_prog *prog, | ||
| 1287 | struct bpf_prog_stats *stats) | ||
| 1288 | { | ||
| 1289 | u64 nsecs = 0, cnt = 0; | ||
| 1290 | int cpu; | ||
| 1291 | |||
| 1292 | for_each_possible_cpu(cpu) { | ||
| 1293 | const struct bpf_prog_stats *st; | ||
| 1294 | unsigned int start; | ||
| 1295 | u64 tnsecs, tcnt; | ||
| 1296 | |||
| 1297 | st = per_cpu_ptr(prog->aux->stats, cpu); | ||
| 1298 | do { | ||
| 1299 | start = u64_stats_fetch_begin_irq(&st->syncp); | ||
| 1300 | tnsecs = st->nsecs; | ||
| 1301 | tcnt = st->cnt; | ||
| 1302 | } while (u64_stats_fetch_retry_irq(&st->syncp, start)); | ||
| 1303 | nsecs += tnsecs; | ||
| 1304 | cnt += tcnt; | ||
| 1305 | } | ||
| 1306 | stats->nsecs = nsecs; | ||
| 1307 | stats->cnt = cnt; | ||
| 1308 | } | ||
| 1309 | |||
| 1286 | #ifdef CONFIG_PROC_FS | 1310 | #ifdef CONFIG_PROC_FS |
| 1287 | static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) | 1311 | static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) |
| 1288 | { | 1312 | { |
| 1289 | const struct bpf_prog *prog = filp->private_data; | 1313 | const struct bpf_prog *prog = filp->private_data; |
| 1290 | char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; | 1314 | char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; |
| 1315 | struct bpf_prog_stats stats; | ||
| 1291 | 1316 | ||
| 1317 | bpf_prog_get_stats(prog, &stats); | ||
| 1292 | bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); | 1318 | bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); |
| 1293 | seq_printf(m, | 1319 | seq_printf(m, |
| 1294 | "prog_type:\t%u\n" | 1320 | "prog_type:\t%u\n" |
| 1295 | "prog_jited:\t%u\n" | 1321 | "prog_jited:\t%u\n" |
| 1296 | "prog_tag:\t%s\n" | 1322 | "prog_tag:\t%s\n" |
| 1297 | "memlock:\t%llu\n" | 1323 | "memlock:\t%llu\n" |
| 1298 | "prog_id:\t%u\n", | 1324 | "prog_id:\t%u\n" |
| 1325 | "run_time_ns:\t%llu\n" | ||
| 1326 | "run_cnt:\t%llu\n", | ||
| 1299 | prog->type, | 1327 | prog->type, |
| 1300 | prog->jited, | 1328 | prog->jited, |
| 1301 | prog_tag, | 1329 | prog_tag, |
| 1302 | prog->pages * 1ULL << PAGE_SHIFT, | 1330 | prog->pages * 1ULL << PAGE_SHIFT, |
| 1303 | prog->aux->id); | 1331 | prog->aux->id, |
| 1332 | stats.nsecs, | ||
| 1333 | stats.cnt); | ||
| 1304 | } | 1334 | } |
| 1305 | #endif | 1335 | #endif |
| 1306 | 1336 | ||
| @@ -2122,6 +2152,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, | |||
| 2122 | struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); | 2152 | struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); |
| 2123 | struct bpf_prog_info info = {}; | 2153 | struct bpf_prog_info info = {}; |
| 2124 | u32 info_len = attr->info.info_len; | 2154 | u32 info_len = attr->info.info_len; |
| 2155 | struct bpf_prog_stats stats; | ||
| 2125 | char __user *uinsns; | 2156 | char __user *uinsns; |
| 2126 | u32 ulen; | 2157 | u32 ulen; |
| 2127 | int err; | 2158 | int err; |
| @@ -2161,6 +2192,10 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, | |||
| 2161 | if (err) | 2192 | if (err) |
| 2162 | return err; | 2193 | return err; |
| 2163 | 2194 | ||
| 2195 | bpf_prog_get_stats(prog, &stats); | ||
| 2196 | info.run_time_ns = stats.nsecs; | ||
| 2197 | info.run_cnt = stats.cnt; | ||
| 2198 | |||
| 2164 | if (!capable(CAP_SYS_ADMIN)) { | 2199 | if (!capable(CAP_SYS_ADMIN)) { |
| 2165 | info.jited_prog_len = 0; | 2200 | info.jited_prog_len = 0; |
| 2166 | info.xlated_prog_len = 0; | 2201 | info.xlated_prog_len = 0; |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1b9496c41383..0e4edd7e3c5f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
| @@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) | |||
| 7320 | subprog_end = env->subprog_info[i + 1].start; | 7320 | subprog_end = env->subprog_info[i + 1].start; |
| 7321 | 7321 | ||
| 7322 | len = subprog_end - subprog_start; | 7322 | len = subprog_end - subprog_start; |
| 7323 | func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); | 7323 | /* BPF_PROG_RUN doesn't call subprogs directly, |
| 7324 | * hence main prog stats include the runtime of subprogs. | ||
| 7325 | * subprogs don't have IDs and not reachable via prog_get_next_id | ||
| 7326 | * func[i]->aux->stats will never be accessed and stays NULL | ||
| 7327 | */ | ||
| 7328 | func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER); | ||
| 7324 | if (!func[i]) | 7329 | if (!func[i]) |
| 7325 | goto out_free; | 7330 | goto out_free; |
| 7326 | memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], | 7331 | memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba4d9e85feb8..86e0771352f2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, | |||
| 224 | #endif | 224 | #endif |
| 225 | static int proc_dopipe_max_size(struct ctl_table *table, int write, | 225 | static int proc_dopipe_max_size(struct ctl_table *table, int write, |
| 226 | void __user *buffer, size_t *lenp, loff_t *ppos); | 226 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| 227 | static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, | ||
| 228 | void __user *buffer, size_t *lenp, | ||
| 229 | loff_t *ppos); | ||
| 227 | 230 | ||
| 228 | #ifdef CONFIG_MAGIC_SYSRQ | 231 | #ifdef CONFIG_MAGIC_SYSRQ |
| 229 | /* Note: sysrq code uses its own private copy */ | 232 | /* Note: sysrq code uses its own private copy */ |
| @@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = { | |||
| 1230 | .extra2 = &one, | 1233 | .extra2 = &one, |
| 1231 | }, | 1234 | }, |
| 1232 | #endif | 1235 | #endif |
| 1236 | { | ||
| 1237 | .procname = "bpf_stats_enabled", | ||
| 1238 | .data = &sysctl_bpf_stats_enabled, | ||
| 1239 | .maxlen = sizeof(sysctl_bpf_stats_enabled), | ||
| 1240 | .mode = 0644, | ||
| 1241 | .proc_handler = proc_dointvec_minmax_bpf_stats, | ||
| 1242 | .extra1 = &zero, | ||
| 1243 | .extra2 = &one, | ||
| 1244 | }, | ||
| 1233 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) | 1245 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) |
| 1234 | { | 1246 | { |
| 1235 | .procname = "panic_on_rcu_stall", | 1247 | .procname = "panic_on_rcu_stall", |
| @@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, | |||
| 3260 | 3272 | ||
| 3261 | #endif /* CONFIG_PROC_SYSCTL */ | 3273 | #endif /* CONFIG_PROC_SYSCTL */ |
| 3262 | 3274 | ||
| 3275 | static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, | ||
| 3276 | void __user *buffer, size_t *lenp, | ||
| 3277 | loff_t *ppos) | ||
| 3278 | { | ||
| 3279 | int ret, bpf_stats = *(int *)table->data; | ||
| 3280 | struct ctl_table tmp = *table; | ||
| 3281 | |||
| 3282 | if (write && !capable(CAP_SYS_ADMIN)) | ||
| 3283 | return -EPERM; | ||
| 3284 | |||
| 3285 | tmp.data = &bpf_stats; | ||
| 3286 | ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); | ||
| 3287 | if (write && !ret) { | ||
| 3288 | *(int *)table->data = bpf_stats; | ||
| 3289 | if (bpf_stats) | ||
| 3290 | static_branch_enable(&bpf_stats_enabled_key); | ||
| 3291 | else | ||
| 3292 | static_branch_disable(&bpf_stats_enabled_key); | ||
| 3293 | } | ||
| 3294 | return ret; | ||
| 3295 | } | ||
| 3296 | |||
| 3263 | /* | 3297 | /* |
| 3264 | * No sense putting this after each symbol definition, twice, | 3298 | * No sense putting this after each symbol definition, twice, |
| 3265 | * exception granted :-) | 3299 | * exception granted :-) |
