aboutsummaryrefslogtreecommitdiffstats
path: root/samples/bpf
diff options
context:
space:
mode:
authorDaniel Wagner <daniel.wagner@bmw-carit.de>2015-06-19 10:00:44 -0400
committerDavid S. Miller <davem@davemloft.net>2015-06-23 09:09:58 -0400
commit0fb1170ee68a6aa14eca0666e02c4b62cbf1251d (patch)
treeb75da9716f7f1de36c90b75bd98ca8ff8151e6f2 /samples/bpf
parent7ce42de1895d4787b47b004638d642dcacb464fe (diff)
bpf: BPF based latency tracing
BPF offers another way to generate latency histograms. We attach kprobes at trace_preempt_off and trace_preempt_on and calculate the time it takes to from seeing the off/on transition. The first array is used to store the start time stamp. The key is the CPU id. The second array stores the log2(time diff). We need to use static allocation here (array and not hash tables). The kprobes hooking into trace_preempt_on|off should not calling any dynamic memory allocation or free path. We need to avoid recursivly getting called. Besides that, it reduces jitter in the measurement. CPU 0 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 166723 |*************************************** | 4096 -> 8191 : 19870 |*** | 8192 -> 16383 : 6324 | | 16384 -> 32767 : 1098 | | 32768 -> 65535 : 190 | | 65536 -> 131071 : 179 | | 131072 -> 262143 : 18 | | 262144 -> 524287 : 4 | | 524288 -> 1048575 : 1363 | | CPU 1 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 114042 |*************************************** | 4096 -> 8191 : 9587 |** | 8192 -> 16383 : 4140 | | 16384 -> 32767 : 673 | | 32768 -> 65535 : 179 | | 65536 -> 131071 : 29 | | 131072 -> 262143 : 4 | | 262144 -> 524287 : 1 | | 524288 -> 1048575 : 364 | | CPU 2 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 40147 |*************************************** | 4096 -> 8191 : 2300 |* | 8192 -> 16383 : 828 | | 16384 -> 32767 : 178 | | 32768 -> 65535 : 59 | | 65536 -> 131071 : 2 | | 131072 -> 262143 : 0 | | 262144 -> 524287 : 1 | | 524288 -> 1048575 : 174 | | CPU 3 latency : count distribution 1 -> 1 : 0 | | 2 -> 3 : 0 | | 4 -> 7 : 0 | | 8 -> 15 : 0 | | 16 -> 31 : 0 | | 32 -> 63 : 0 | | 64 -> 127 : 0 | | 128 -> 255 : 0 | | 256 -> 511 : 0 | | 512 -> 1023 : 0 | | 1024 -> 2047 : 0 | | 2048 -> 4095 : 29626 |*************************************** | 4096 -> 8191 : 2704 |** | 8192 -> 16383 : 1090 | | 16384 -> 32767 : 160 | | 32768 -> 65535 : 72 | | 65536 -> 131071 : 32 | | 131072 -> 262143 : 26 | | 262144 -> 524287 : 12 | | 524288 -> 1048575 : 298 | | All this is based on the trace3 examples written by Alexei Starovoitov <ast@plumgrid.com>. Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: Alexei Starovoitov <ast@plumgrid.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: Ingo Molnar <mingo@kernel.org> Cc: linux-kernel@vger.kernel.org Cc: netdev@vger.kernel.org Acked-by: Alexei Starovoitov <ast@plumgrid.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'samples/bpf')
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/lathist_kern.c99
-rw-r--r--samples/bpf/lathist_user.c103
3 files changed, 206 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 46c6a8cf74d3..4450fed91ab4 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -12,6 +12,7 @@ hostprogs-y += tracex2
12hostprogs-y += tracex3 12hostprogs-y += tracex3
13hostprogs-y += tracex4 13hostprogs-y += tracex4
14hostprogs-y += tracex5 14hostprogs-y += tracex5
15hostprogs-y += lathist
15 16
16test_verifier-objs := test_verifier.o libbpf.o 17test_verifier-objs := test_verifier.o libbpf.o
17test_maps-objs := test_maps.o libbpf.o 18test_maps-objs := test_maps.o libbpf.o
@@ -24,6 +25,7 @@ tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
24tracex3-objs := bpf_load.o libbpf.o tracex3_user.o 25tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
25tracex4-objs := bpf_load.o libbpf.o tracex4_user.o 26tracex4-objs := bpf_load.o libbpf.o tracex4_user.o
26tracex5-objs := bpf_load.o libbpf.o tracex5_user.o 27tracex5-objs := bpf_load.o libbpf.o tracex5_user.o
28lathist-objs := bpf_load.o libbpf.o lathist_user.o
27 29
28# Tell kbuild to always build the programs 30# Tell kbuild to always build the programs
29always := $(hostprogs-y) 31always := $(hostprogs-y)
@@ -36,6 +38,7 @@ always += tracex3_kern.o
36always += tracex4_kern.o 38always += tracex4_kern.o
37always += tracex5_kern.o 39always += tracex5_kern.o
38always += tcbpf1_kern.o 40always += tcbpf1_kern.o
41always += lathist_kern.o
39 42
40HOSTCFLAGS += -I$(objtree)/usr/include 43HOSTCFLAGS += -I$(objtree)/usr/include
41 44
@@ -48,6 +51,7 @@ HOSTLOADLIBES_tracex2 += -lelf
48HOSTLOADLIBES_tracex3 += -lelf 51HOSTLOADLIBES_tracex3 += -lelf
49HOSTLOADLIBES_tracex4 += -lelf -lrt 52HOSTLOADLIBES_tracex4 += -lelf -lrt
50HOSTLOADLIBES_tracex5 += -lelf 53HOSTLOADLIBES_tracex5 += -lelf
54HOSTLOADLIBES_lathist += -lelf
51 55
52# point this to your LLVM backend with bpf support 56# point this to your LLVM backend with bpf support
53LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc 57LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/lathist_kern.c b/samples/bpf/lathist_kern.c
new file mode 100644
index 000000000000..18fa088473cd
--- /dev/null
+++ b/samples/bpf/lathist_kern.c
@@ -0,0 +1,99 @@
1/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2015 BMW Car IT GmbH
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8#include <linux/version.h>
9#include <linux/ptrace.h>
10#include <uapi/linux/bpf.h>
11#include "bpf_helpers.h"
12
13#define MAX_ENTRIES 20
14#define MAX_CPU 4
15
16/* We need to stick to static allocated memory (an array instead of
17 * hash table) because managing dynamic memory from the
18 * trace_preempt_[on|off] tracepoints hooks is not supported.
19 */
20
21struct bpf_map_def SEC("maps") my_map = {
22 .type = BPF_MAP_TYPE_ARRAY,
23 .key_size = sizeof(int),
24 .value_size = sizeof(u64),
25 .max_entries = MAX_CPU,
26};
27
28SEC("kprobe/trace_preempt_off")
29int bpf_prog1(struct pt_regs *ctx)
30{
31 int cpu = bpf_get_smp_processor_id();
32 u64 *ts = bpf_map_lookup_elem(&my_map, &cpu);
33
34 if (ts)
35 *ts = bpf_ktime_get_ns();
36
37 return 0;
38}
39
40static unsigned int log2(unsigned int v)
41{
42 unsigned int r;
43 unsigned int shift;
44
45 r = (v > 0xFFFF) << 4; v >>= r;
46 shift = (v > 0xFF) << 3; v >>= shift; r |= shift;
47 shift = (v > 0xF) << 2; v >>= shift; r |= shift;
48 shift = (v > 0x3) << 1; v >>= shift; r |= shift;
49 r |= (v >> 1);
50
51 return r;
52}
53
54static unsigned int log2l(unsigned long v)
55{
56 unsigned int hi = v >> 32;
57
58 if (hi)
59 return log2(hi) + 32;
60 else
61 return log2(v);
62}
63
64struct bpf_map_def SEC("maps") my_lat = {
65 .type = BPF_MAP_TYPE_ARRAY,
66 .key_size = sizeof(int),
67 .value_size = sizeof(long),
68 .max_entries = MAX_CPU * MAX_ENTRIES,
69};
70
71SEC("kprobe/trace_preempt_on")
72int bpf_prog2(struct pt_regs *ctx)
73{
74 u64 *ts, cur_ts, delta;
75 int key, cpu;
76 long *val;
77
78 cpu = bpf_get_smp_processor_id();
79 ts = bpf_map_lookup_elem(&my_map, &cpu);
80 if (!ts)
81 return 0;
82
83 cur_ts = bpf_ktime_get_ns();
84 delta = log2l(cur_ts - *ts);
85
86 if (delta > MAX_ENTRIES - 1)
87 delta = MAX_ENTRIES - 1;
88
89 key = cpu * MAX_ENTRIES + delta;
90 val = bpf_map_lookup_elem(&my_lat, &key);
91 if (val)
92 __sync_fetch_and_add((long *)val, 1);
93
94 return 0;
95
96}
97
98char _license[] SEC("license") = "GPL";
99u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/lathist_user.c b/samples/bpf/lathist_user.c
new file mode 100644
index 000000000000..65da8c1576de
--- /dev/null
+++ b/samples/bpf/lathist_user.c
@@ -0,0 +1,103 @@
1/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2015 BMW Car IT GmbH
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8#include <stdio.h>
9#include <unistd.h>
10#include <stdlib.h>
11#include <signal.h>
12#include <linux/bpf.h>
13#include "libbpf.h"
14#include "bpf_load.h"
15
16#define MAX_ENTRIES 20
17#define MAX_CPU 4
18#define MAX_STARS 40
19
20struct cpu_hist {
21 long data[MAX_ENTRIES];
22 long max;
23};
24
25static struct cpu_hist cpu_hist[MAX_CPU];
26
27static void stars(char *str, long val, long max, int width)
28{
29 int i;
30
31 for (i = 0; i < (width * val / max) - 1 && i < width - 1; i++)
32 str[i] = '*';
33 if (val > max)
34 str[i - 1] = '+';
35 str[i] = '\0';
36}
37
38static void print_hist(void)
39{
40 char starstr[MAX_STARS];
41 struct cpu_hist *hist;
42 int i, j;
43
44 /* clear screen */
45 printf("\033[2J");
46
47 for (j = 0; j < MAX_CPU; j++) {
48 hist = &cpu_hist[j];
49
50 /* ignore CPUs without data (maybe offline?) */
51 if (hist->max == 0)
52 continue;
53
54 printf("CPU %d\n", j);
55 printf(" latency : count distribution\n");
56 for (i = 1; i <= MAX_ENTRIES; i++) {
57 stars(starstr, hist->data[i - 1], hist->max, MAX_STARS);
58 printf("%8ld -> %-8ld : %-8ld |%-*s|\n",
59 (1l << i) >> 1, (1l << i) - 1,
60 hist->data[i - 1], MAX_STARS, starstr);
61 }
62 }
63}
64
65static void get_data(int fd)
66{
67 long key, value;
68 int c, i;
69
70 for (i = 0; i < MAX_CPU; i++)
71 cpu_hist[i].max = 0;
72
73 for (c = 0; c < MAX_CPU; c++) {
74 for (i = 0; i < MAX_ENTRIES; i++) {
75 key = c * MAX_ENTRIES + i;
76 bpf_lookup_elem(fd, &key, &value);
77
78 cpu_hist[c].data[i] = value;
79 if (value > cpu_hist[c].max)
80 cpu_hist[c].max = value;
81 }
82 }
83}
84
85int main(int argc, char **argv)
86{
87 char filename[256];
88
89 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
90
91 if (load_bpf_file(filename)) {
92 printf("%s", bpf_log_buf);
93 return 1;
94 }
95
96 while (1) {
97 get_data(map_fd[1]);
98 print_hist();
99 sleep(5);
100 }
101
102 return 0;
103}