diff options
Diffstat (limited to 'kernel/trace/bpf_trace.c')
-rw-r--r-- | kernel/trace/bpf_trace.c | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c new file mode 100644 index 000000000000..2d56ce501632 --- /dev/null +++ b/kernel/trace/bpf_trace.c | |||
@@ -0,0 +1,222 @@ | |||
1 | /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | */ | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/types.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/bpf.h> | ||
11 | #include <linux/filter.h> | ||
12 | #include <linux/uaccess.h> | ||
13 | #include <linux/ctype.h> | ||
14 | #include "trace.h" | ||
15 | |||
16 | static DEFINE_PER_CPU(int, bpf_prog_active); | ||
17 | |||
18 | /** | ||
19 | * trace_call_bpf - invoke BPF program | ||
20 | * @prog: BPF program | ||
21 | * @ctx: opaque context pointer | ||
22 | * | ||
23 | * kprobe handlers execute BPF programs via this helper. | ||
24 | * Can be used from static tracepoints in the future. | ||
25 | * | ||
26 | * Return: BPF programs always return an integer which is interpreted by | ||
27 | * kprobe handler as: | ||
28 | * 0 - return from kprobe (event is filtered out) | ||
29 | * 1 - store kprobe event into ring buffer | ||
30 | * Other values are reserved and currently alias to 1 | ||
31 | */ | ||
32 | unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) | ||
33 | { | ||
34 | unsigned int ret; | ||
35 | |||
36 | if (in_nmi()) /* not supported yet */ | ||
37 | return 1; | ||
38 | |||
39 | preempt_disable(); | ||
40 | |||
41 | if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { | ||
42 | /* | ||
43 | * since some bpf program is already running on this cpu, | ||
44 | * don't call into another bpf program (same or different) | ||
45 | * and don't send kprobe event into ring-buffer, | ||
46 | * so return zero here | ||
47 | */ | ||
48 | ret = 0; | ||
49 | goto out; | ||
50 | } | ||
51 | |||
52 | rcu_read_lock(); | ||
53 | ret = BPF_PROG_RUN(prog, ctx); | ||
54 | rcu_read_unlock(); | ||
55 | |||
56 | out: | ||
57 | __this_cpu_dec(bpf_prog_active); | ||
58 | preempt_enable(); | ||
59 | |||
60 | return ret; | ||
61 | } | ||
62 | EXPORT_SYMBOL_GPL(trace_call_bpf); | ||
63 | |||
64 | static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
65 | { | ||
66 | void *dst = (void *) (long) r1; | ||
67 | int size = (int) r2; | ||
68 | void *unsafe_ptr = (void *) (long) r3; | ||
69 | |||
70 | return probe_kernel_read(dst, unsafe_ptr, size); | ||
71 | } | ||
72 | |||
73 | static const struct bpf_func_proto bpf_probe_read_proto = { | ||
74 | .func = bpf_probe_read, | ||
75 | .gpl_only = true, | ||
76 | .ret_type = RET_INTEGER, | ||
77 | .arg1_type = ARG_PTR_TO_STACK, | ||
78 | .arg2_type = ARG_CONST_STACK_SIZE, | ||
79 | .arg3_type = ARG_ANYTHING, | ||
80 | }; | ||
81 | |||
82 | static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
83 | { | ||
84 | /* NMI safe access to clock monotonic */ | ||
85 | return ktime_get_mono_fast_ns(); | ||
86 | } | ||
87 | |||
88 | static const struct bpf_func_proto bpf_ktime_get_ns_proto = { | ||
89 | .func = bpf_ktime_get_ns, | ||
90 | .gpl_only = true, | ||
91 | .ret_type = RET_INTEGER, | ||
92 | }; | ||
93 | |||
94 | /* | ||
95 | * limited trace_printk() | ||
96 | * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed | ||
97 | */ | ||
98 | static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) | ||
99 | { | ||
100 | char *fmt = (char *) (long) r1; | ||
101 | int mod[3] = {}; | ||
102 | int fmt_cnt = 0; | ||
103 | int i; | ||
104 | |||
105 | /* | ||
106 | * bpf_check()->check_func_arg()->check_stack_boundary() | ||
107 | * guarantees that fmt points to bpf program stack, | ||
108 | * fmt_size bytes of it were initialized and fmt_size > 0 | ||
109 | */ | ||
110 | if (fmt[--fmt_size] != 0) | ||
111 | return -EINVAL; | ||
112 | |||
113 | /* check format string for allowed specifiers */ | ||
114 | for (i = 0; i < fmt_size; i++) { | ||
115 | if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) | ||
116 | return -EINVAL; | ||
117 | |||
118 | if (fmt[i] != '%') | ||
119 | continue; | ||
120 | |||
121 | if (fmt_cnt >= 3) | ||
122 | return -EINVAL; | ||
123 | |||
124 | /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */ | ||
125 | i++; | ||
126 | if (fmt[i] == 'l') { | ||
127 | mod[fmt_cnt]++; | ||
128 | i++; | ||
129 | } else if (fmt[i] == 'p') { | ||
130 | mod[fmt_cnt]++; | ||
131 | i++; | ||
132 | if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) | ||
133 | return -EINVAL; | ||
134 | fmt_cnt++; | ||
135 | continue; | ||
136 | } | ||
137 | |||
138 | if (fmt[i] == 'l') { | ||
139 | mod[fmt_cnt]++; | ||
140 | i++; | ||
141 | } | ||
142 | |||
143 | if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') | ||
144 | return -EINVAL; | ||
145 | fmt_cnt++; | ||
146 | } | ||
147 | |||
148 | return __trace_printk(1/* fake ip will not be printed */, fmt, | ||
149 | mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, | ||
150 | mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, | ||
151 | mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); | ||
152 | } | ||
153 | |||
154 | static const struct bpf_func_proto bpf_trace_printk_proto = { | ||
155 | .func = bpf_trace_printk, | ||
156 | .gpl_only = true, | ||
157 | .ret_type = RET_INTEGER, | ||
158 | .arg1_type = ARG_PTR_TO_STACK, | ||
159 | .arg2_type = ARG_CONST_STACK_SIZE, | ||
160 | }; | ||
161 | |||
162 | static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) | ||
163 | { | ||
164 | switch (func_id) { | ||
165 | case BPF_FUNC_map_lookup_elem: | ||
166 | return &bpf_map_lookup_elem_proto; | ||
167 | case BPF_FUNC_map_update_elem: | ||
168 | return &bpf_map_update_elem_proto; | ||
169 | case BPF_FUNC_map_delete_elem: | ||
170 | return &bpf_map_delete_elem_proto; | ||
171 | case BPF_FUNC_probe_read: | ||
172 | return &bpf_probe_read_proto; | ||
173 | case BPF_FUNC_ktime_get_ns: | ||
174 | return &bpf_ktime_get_ns_proto; | ||
175 | |||
176 | case BPF_FUNC_trace_printk: | ||
177 | /* | ||
178 | * this program might be calling bpf_trace_printk, | ||
179 | * so allocate per-cpu printk buffers | ||
180 | */ | ||
181 | trace_printk_init_buffers(); | ||
182 | |||
183 | return &bpf_trace_printk_proto; | ||
184 | default: | ||
185 | return NULL; | ||
186 | } | ||
187 | } | ||
188 | |||
189 | /* bpf+kprobe programs can access fields of 'struct pt_regs' */ | ||
190 | static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) | ||
191 | { | ||
192 | /* check bounds */ | ||
193 | if (off < 0 || off >= sizeof(struct pt_regs)) | ||
194 | return false; | ||
195 | |||
196 | /* only read is allowed */ | ||
197 | if (type != BPF_READ) | ||
198 | return false; | ||
199 | |||
200 | /* disallow misaligned access */ | ||
201 | if (off % size != 0) | ||
202 | return false; | ||
203 | |||
204 | return true; | ||
205 | } | ||
206 | |||
207 | static struct bpf_verifier_ops kprobe_prog_ops = { | ||
208 | .get_func_proto = kprobe_prog_func_proto, | ||
209 | .is_valid_access = kprobe_prog_is_valid_access, | ||
210 | }; | ||
211 | |||
212 | static struct bpf_prog_type_list kprobe_tl = { | ||
213 | .ops = &kprobe_prog_ops, | ||
214 | .type = BPF_PROG_TYPE_KPROBE, | ||
215 | }; | ||
216 | |||
217 | static int __init register_kprobe_prog_ops(void) | ||
218 | { | ||
219 | bpf_register_prog_type(&kprobe_tl); | ||
220 | return 0; | ||
221 | } | ||
222 | late_initcall(register_kprobe_prog_ops); | ||