aboutsummaryrefslogtreecommitdiffstats
path: root/samples
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@plumgrid.com>2015-03-25 15:49:25 -0400
committerIngo Molnar <mingo@kernel.org>2015-04-02 07:25:51 -0400
commit5c7fc2d27d004f28f3a94b35edd40e68f779e35a (patch)
tree97eb95fe90f8902117327038f4e1dadaf5c0ea43 /samples
parentd822a192684912c80950d28a0b7adc96261e957c (diff)
samples/bpf: Add IO latency analysis (iosnoop/heatmap) tool
BPF C program attaches to blk_mq_start_request()/blk_update_request() kprobe events to calculate IO latency. For every completed block IO event it computes the time delta in nsec and records in a histogram map: map[log10(delta)*10]++ User space reads this histogram map every 2 seconds and prints it as a 'heatmap' using gray shades of text terminal. Black spaces have many events and white spaces have very few events. Left most space is the smallest latency, right most space is the largest latency in the range. Usage: $ sudo ./tracex3 and do 'sudo dd if=/dev/sda of=/dev/null' in other terminal. Observe IO latencies and how different activity (like 'make kernel') affects it. Similar experiments can be done for network transmit latencies, syscalls, etc. '-t' flag prints the heatmap using normal ascii characters: $ sudo ./tracex3 -t heatmap of IO latency # - many events with this latency - few events |1us |10us |100us |1ms |10ms |100ms |1s |10s *ooo. *O.#. # 221 . *# . # 125 .. .o#*.. # 55 . . . . .#O # 37 .# # 175 .#*. # 37 # # 199 . . *#*. # 55 *#..* # 42 # # 266 ...***Oo#*OO**o#* . # 629 # # 271 . .#o* o.*o* # 221 . . o* *#O.. # 50 Signed-off-by: Alexei Starovoitov <ast@plumgrid.com> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Daniel Borkmann <daniel@iogearbox.net> Cc: David S. Miller <davem@davemloft.net> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/1427312966-8434-9-git-send-email-ast@plumgrid.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'samples')
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/tracex3_kern.c89
-rw-r--r--samples/bpf/tracex3_user.c150
3 files changed, 243 insertions, 0 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 6dd272143733..dcd850546d52 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -8,6 +8,7 @@ hostprogs-y += sockex1
8hostprogs-y += sockex2 8hostprogs-y += sockex2
9hostprogs-y += tracex1 9hostprogs-y += tracex1
10hostprogs-y += tracex2 10hostprogs-y += tracex2
11hostprogs-y += tracex3
11 12
12test_verifier-objs := test_verifier.o libbpf.o 13test_verifier-objs := test_verifier.o libbpf.o
13test_maps-objs := test_maps.o libbpf.o 14test_maps-objs := test_maps.o libbpf.o
@@ -16,6 +17,7 @@ sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
16sockex2-objs := bpf_load.o libbpf.o sockex2_user.o 17sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
17tracex1-objs := bpf_load.o libbpf.o tracex1_user.o 18tracex1-objs := bpf_load.o libbpf.o tracex1_user.o
18tracex2-objs := bpf_load.o libbpf.o tracex2_user.o 19tracex2-objs := bpf_load.o libbpf.o tracex2_user.o
20tracex3-objs := bpf_load.o libbpf.o tracex3_user.o
19 21
20# Tell kbuild to always build the programs 22# Tell kbuild to always build the programs
21always := $(hostprogs-y) 23always := $(hostprogs-y)
@@ -23,6 +25,7 @@ always += sockex1_kern.o
23always += sockex2_kern.o 25always += sockex2_kern.o
24always += tracex1_kern.o 26always += tracex1_kern.o
25always += tracex2_kern.o 27always += tracex2_kern.o
28always += tracex3_kern.o
26 29
27HOSTCFLAGS += -I$(objtree)/usr/include 30HOSTCFLAGS += -I$(objtree)/usr/include
28 31
@@ -31,6 +34,7 @@ HOSTLOADLIBES_sockex1 += -lelf
31HOSTLOADLIBES_sockex2 += -lelf 34HOSTLOADLIBES_sockex2 += -lelf
32HOSTLOADLIBES_tracex1 += -lelf 35HOSTLOADLIBES_tracex1 += -lelf
33HOSTLOADLIBES_tracex2 += -lelf 36HOSTLOADLIBES_tracex2 += -lelf
37HOSTLOADLIBES_tracex3 += -lelf
34 38
35# point this to your LLVM backend with bpf support 39# point this to your LLVM backend with bpf support
36LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc 40LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
new file mode 100644
index 000000000000..255ff2792366
--- /dev/null
+++ b/samples/bpf/tracex3_kern.c
@@ -0,0 +1,89 @@
1/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <linux/skbuff.h>
8#include <linux/netdevice.h>
9#include <linux/version.h>
10#include <uapi/linux/bpf.h>
11#include "bpf_helpers.h"
12
13struct bpf_map_def SEC("maps") my_map = {
14 .type = BPF_MAP_TYPE_HASH,
15 .key_size = sizeof(long),
16 .value_size = sizeof(u64),
17 .max_entries = 4096,
18};
19
20/* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
21 * example will no longer be meaningful
22 */
23SEC("kprobe/blk_mq_start_request")
24int bpf_prog1(struct pt_regs *ctx)
25{
26 long rq = ctx->di;
27 u64 val = bpf_ktime_get_ns();
28
29 bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
30 return 0;
31}
32
33static unsigned int log2l(unsigned long long n)
34{
35#define S(k) if (n >= (1ull << k)) { i += k; n >>= k; }
36 int i = -(n == 0);
37 S(32); S(16); S(8); S(4); S(2); S(1);
38 return i;
39#undef S
40}
41
42#define SLOTS 100
43
44struct bpf_map_def SEC("maps") lat_map = {
45 .type = BPF_MAP_TYPE_ARRAY,
46 .key_size = sizeof(u32),
47 .value_size = sizeof(u64),
48 .max_entries = SLOTS,
49};
50
51SEC("kprobe/blk_update_request")
52int bpf_prog2(struct pt_regs *ctx)
53{
54 long rq = ctx->di;
55 u64 *value, l, base;
56 u32 index;
57
58 value = bpf_map_lookup_elem(&my_map, &rq);
59 if (!value)
60 return 0;
61
62 u64 cur_time = bpf_ktime_get_ns();
63 u64 delta = cur_time - *value;
64
65 bpf_map_delete_elem(&my_map, &rq);
66
67 /* the lines below are computing index = log10(delta)*10
68 * using integer arithmetic
69 * index = 29 ~ 1 usec
70 * index = 59 ~ 1 msec
71 * index = 89 ~ 1 sec
72 * index = 99 ~ 10sec or more
73 * log10(x)*10 = log2(x)*10/log2(10) = log2(x)*3
74 */
75 l = log2l(delta);
76 base = 1ll << l;
77 index = (l * 64 + (delta - base) * 64 / base) * 3 / 64;
78
79 if (index >= SLOTS)
80 index = SLOTS - 1;
81
82 value = bpf_map_lookup_elem(&lat_map, &index);
83 if (value)
84 __sync_fetch_and_add((long *)value, 1);
85
86 return 0;
87}
88char _license[] SEC("license") = "GPL";
89u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
new file mode 100644
index 000000000000..0aaa933ab938
--- /dev/null
+++ b/samples/bpf/tracex3_user.c
@@ -0,0 +1,150 @@
1/* Copyright (c) 2013-2015 PLUMgrid, http://plumgrid.com
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of version 2 of the GNU General Public
5 * License as published by the Free Software Foundation.
6 */
7#include <stdio.h>
8#include <stdlib.h>
9#include <signal.h>
10#include <unistd.h>
11#include <stdbool.h>
12#include <string.h>
13#include <linux/bpf.h>
14#include "libbpf.h"
15#include "bpf_load.h"
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
18
19#define SLOTS 100
20
21static void clear_stats(int fd)
22{
23 __u32 key;
24 __u64 value = 0;
25
26 for (key = 0; key < SLOTS; key++)
27 bpf_update_elem(fd, &key, &value, BPF_ANY);
28}
29
30const char *color[] = {
31 "\033[48;5;255m",
32 "\033[48;5;252m",
33 "\033[48;5;250m",
34 "\033[48;5;248m",
35 "\033[48;5;246m",
36 "\033[48;5;244m",
37 "\033[48;5;242m",
38 "\033[48;5;240m",
39 "\033[48;5;238m",
40 "\033[48;5;236m",
41 "\033[48;5;234m",
42 "\033[48;5;232m",
43};
44const int num_colors = ARRAY_SIZE(color);
45
46const char nocolor[] = "\033[00m";
47
48const char *sym[] = {
49 " ",
50 " ",
51 ".",
52 ".",
53 "*",
54 "*",
55 "o",
56 "o",
57 "O",
58 "O",
59 "#",
60 "#",
61};
62
63bool full_range = false;
64bool text_only = false;
65
66static void print_banner(void)
67{
68 if (full_range)
69 printf("|1ns |10ns |100ns |1us |10us |100us"
70 " |1ms |10ms |100ms |1s |10s\n");
71 else
72 printf("|1us |10us |100us |1ms |10ms "
73 "|100ms |1s |10s\n");
74}
75
76static void print_hist(int fd)
77{
78 __u32 key;
79 __u64 value;
80 __u64 cnt[SLOTS];
81 __u64 max_cnt = 0;
82 __u64 total_events = 0;
83
84 for (key = 0; key < SLOTS; key++) {
85 value = 0;
86 bpf_lookup_elem(fd, &key, &value);
87 cnt[key] = value;
88 total_events += value;
89 if (value > max_cnt)
90 max_cnt = value;
91 }
92 clear_stats(fd);
93 for (key = full_range ? 0 : 29; key < SLOTS; key++) {
94 int c = num_colors * cnt[key] / (max_cnt + 1);
95
96 if (text_only)
97 printf("%s", sym[c]);
98 else
99 printf("%s %s", color[c], nocolor);
100 }
101 printf(" # %lld\n", total_events);
102}
103
104int main(int ac, char **argv)
105{
106 char filename[256];
107 int i;
108
109 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
110
111 if (load_bpf_file(filename)) {
112 printf("%s", bpf_log_buf);
113 return 1;
114 }
115
116 for (i = 1; i < ac; i++) {
117 if (strcmp(argv[i], "-a") == 0) {
118 full_range = true;
119 } else if (strcmp(argv[i], "-t") == 0) {
120 text_only = true;
121 } else if (strcmp(argv[i], "-h") == 0) {
122 printf("Usage:\n"
123 " -a display wider latency range\n"
124 " -t text only\n");
125 return 1;
126 }
127 }
128
129 printf(" heatmap of IO latency\n");
130 if (text_only)
131 printf(" %s", sym[num_colors - 1]);
132 else
133 printf(" %s %s", color[num_colors - 1], nocolor);
134 printf(" - many events with this latency\n");
135
136 if (text_only)
137 printf(" %s", sym[0]);
138 else
139 printf(" %s %s", color[0], nocolor);
140 printf(" - few events\n");
141
142 for (i = 0; ; i++) {
143 if (i % 20 == 0)
144 print_banner();
145 print_hist(map_fd[1]);
146 sleep(2);
147 }
148
149 return 0;
150}