diff options
author | Feng(Eric) Liu <eric.e.liu@intel.com> | 2008-04-10 08:47:53 -0400 |
---|---|---|
committer | Avi Kivity <avi@qumranet.com> | 2008-04-27 05:01:22 -0400 |
commit | d4c9ff2d1b78e385471b3f4d80c0596909926ef7 (patch) | |
tree | 5551c0f6c56439df39ca434115840f4e36cee2ea | |
parent | 048354c8e6bf95e7347f623d8a0da5b89e216405 (diff) |
KVM: Add kvm trace userspace interface
This interface allows user a space application to read the trace of kvm
related events through relayfs.
Signed-off-by: Feng (Eric) Liu <eric.e.liu@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
-rw-r--r-- | arch/x86/kvm/Kconfig | 11 | ||||
-rw-r--r-- | arch/x86/kvm/Makefile | 3 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 14 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 8 | ||||
-rw-r--r-- | virt/kvm/kvm_trace.c | 276 |
5 files changed, 311 insertions, 1 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 76c70ab44382..8d45fabc5f3b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -50,6 +50,17 @@ config KVM_AMD | |||
50 | Provides support for KVM on AMD processors equipped with the AMD-V | 50 | Provides support for KVM on AMD processors equipped with the AMD-V |
51 | (SVM) extensions. | 51 | (SVM) extensions. |
52 | 52 | ||
53 | config KVM_TRACE | ||
54 | bool "KVM trace support" | ||
55 | depends on KVM && MARKERS && SYSFS | ||
56 | select RELAY | ||
57 | select DEBUG_FS | ||
58 | default n | ||
59 | ---help--- | ||
60 | This option allows reading a trace of kvm-related events through | ||
61 | relayfs. Note the ABI is not considered stable and will be | ||
62 | modified in future updates. | ||
63 | |||
53 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under | 64 | # OK, it's a little counter-intuitive to do this, but it puts it neatly under |
54 | # the virtualization menu. | 65 | # the virtualization menu. |
55 | source drivers/lguest/Kconfig | 66 | source drivers/lguest/Kconfig |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 4d0c22e11f1a..c97d35c218db 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -3,6 +3,9 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) | 5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) |
6 | ifeq ($(CONFIG_KVM_TRACE),y) | ||
7 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) | ||
8 | endif | ||
6 | 9 | ||
7 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 10 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm |
8 | 11 | ||
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 578c3638bbba..bd0c2d2d840f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
18 | #include <linux/marker.h> | ||
18 | #include <asm/signal.h> | 19 | #include <asm/signal.h> |
19 | 20 | ||
20 | #include <linux/kvm.h> | 21 | #include <linux/kvm.h> |
@@ -309,5 +310,18 @@ struct kvm_stats_debugfs_item { | |||
309 | struct dentry *dentry; | 310 | struct dentry *dentry; |
310 | }; | 311 | }; |
311 | extern struct kvm_stats_debugfs_item debugfs_entries[]; | 312 | extern struct kvm_stats_debugfs_item debugfs_entries[]; |
313 | extern struct dentry *debugfs_dir; | ||
314 | |||
315 | #ifdef CONFIG_KVM_TRACE | ||
316 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg); | ||
317 | void kvm_trace_cleanup(void); | ||
318 | #else | ||
319 | static inline | ||
320 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
321 | { | ||
322 | return -EINVAL; | ||
323 | } | ||
324 | #define kvm_trace_cleanup() ((void)0) | ||
325 | #endif | ||
312 | 326 | ||
313 | #endif | 327 | #endif |
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a52c084e068..d5911d9895c3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c | |||
@@ -60,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache); | |||
60 | 60 | ||
61 | static __read_mostly struct preempt_ops kvm_preempt_ops; | 61 | static __read_mostly struct preempt_ops kvm_preempt_ops; |
62 | 62 | ||
63 | static struct dentry *debugfs_dir; | 63 | struct dentry *debugfs_dir; |
64 | 64 | ||
65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, | 65 | static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, |
66 | unsigned long arg); | 66 | unsigned long arg); |
@@ -1191,6 +1191,11 @@ static long kvm_dev_ioctl(struct file *filp, | |||
1191 | r += PAGE_SIZE; /* pio data page */ | 1191 | r += PAGE_SIZE; /* pio data page */ |
1192 | #endif | 1192 | #endif |
1193 | break; | 1193 | break; |
1194 | case KVM_TRACE_ENABLE: | ||
1195 | case KVM_TRACE_PAUSE: | ||
1196 | case KVM_TRACE_DISABLE: | ||
1197 | r = kvm_trace_ioctl(ioctl, arg); | ||
1198 | break; | ||
1194 | default: | 1199 | default: |
1195 | return kvm_arch_dev_ioctl(filp, ioctl, arg); | 1200 | return kvm_arch_dev_ioctl(filp, ioctl, arg); |
1196 | } | 1201 | } |
@@ -1519,6 +1524,7 @@ EXPORT_SYMBOL_GPL(kvm_init); | |||
1519 | 1524 | ||
1520 | void kvm_exit(void) | 1525 | void kvm_exit(void) |
1521 | { | 1526 | { |
1527 | kvm_trace_cleanup(); | ||
1522 | misc_deregister(&kvm_dev); | 1528 | misc_deregister(&kvm_dev); |
1523 | kmem_cache_destroy(kvm_vcpu_cache); | 1529 | kmem_cache_destroy(kvm_vcpu_cache); |
1524 | sysdev_unregister(&kvm_sysdev); | 1530 | sysdev_unregister(&kvm_sysdev); |
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c new file mode 100644 index 000000000000..5425440c54bf --- /dev/null +++ b/virt/kvm/kvm_trace.c | |||
@@ -0,0 +1,276 @@ | |||
1 | /* | ||
2 | * kvm trace | ||
3 | * | ||
4 | * It is designed to allow debugging traces of kvm to be generated | ||
5 | * on UP / SMP machines. Each trace entry can be timestamped so that | ||
6 | * it's possible to reconstruct a chronological record of trace events. | ||
7 | * The implementation refers to blktrace kernel support. | ||
8 | * | ||
9 | * Copyright (c) 2008 Intel Corporation | ||
10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> | ||
11 | * | ||
12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com | ||
13 | * | ||
14 | * Date: Feb 2008 | ||
15 | */ | ||
16 | |||
17 | #include <linux/module.h> | ||
18 | #include <linux/relay.h> | ||
19 | #include <linux/debugfs.h> | ||
20 | |||
21 | #include <linux/kvm_host.h> | ||
22 | |||
23 | #define KVM_TRACE_STATE_RUNNING (1 << 0) | ||
24 | #define KVM_TRACE_STATE_PAUSE (1 << 1) | ||
25 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) | ||
26 | |||
27 | struct kvm_trace { | ||
28 | int trace_state; | ||
29 | struct rchan *rchan; | ||
30 | struct dentry *lost_file; | ||
31 | atomic_t lost_records; | ||
32 | }; | ||
33 | static struct kvm_trace *kvm_trace; | ||
34 | |||
35 | struct kvm_trace_probe { | ||
36 | const char *name; | ||
37 | const char *format; | ||
38 | u32 cycle_in; | ||
39 | marker_probe_func *probe_func; | ||
40 | }; | ||
41 | |||
42 | static inline int calc_rec_size(int cycle, int extra) | ||
43 | { | ||
44 | int rec_size = KVM_TRC_HEAD_SIZE; | ||
45 | |||
46 | rec_size += extra; | ||
47 | return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; | ||
48 | } | ||
49 | |||
50 | static void kvm_add_trace(void *probe_private, void *call_data, | ||
51 | const char *format, va_list *args) | ||
52 | { | ||
53 | struct kvm_trace_probe *p = probe_private; | ||
54 | struct kvm_trace *kt = kvm_trace; | ||
55 | struct kvm_trace_rec rec; | ||
56 | struct kvm_vcpu *vcpu; | ||
57 | int i, extra, size; | ||
58 | |||
59 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) | ||
60 | return; | ||
61 | |||
62 | rec.event = va_arg(*args, u32); | ||
63 | vcpu = va_arg(*args, struct kvm_vcpu *); | ||
64 | rec.pid = current->tgid; | ||
65 | rec.vcpu_id = vcpu->vcpu_id; | ||
66 | |||
67 | extra = va_arg(*args, u32); | ||
68 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); | ||
69 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); | ||
70 | rec.extra_u32 = extra; | ||
71 | |||
72 | rec.cycle_in = p->cycle_in; | ||
73 | |||
74 | if (rec.cycle_in) { | ||
75 | u64 cycle = 0; | ||
76 | |||
77 | cycle = get_cycles(); | ||
78 | rec.u.cycle.cycle_lo = (u32)cycle; | ||
79 | rec.u.cycle.cycle_hi = (u32)(cycle >> 32); | ||
80 | |||
81 | for (i = 0; i < rec.extra_u32; i++) | ||
82 | rec.u.cycle.extra_u32[i] = va_arg(*args, u32); | ||
83 | } else { | ||
84 | for (i = 0; i < rec.extra_u32; i++) | ||
85 | rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); | ||
86 | } | ||
87 | |||
88 | size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); | ||
89 | relay_write(kt->rchan, &rec, size); | ||
90 | } | ||
91 | |||
92 | static struct kvm_trace_probe kvm_trace_probes[] = { | ||
93 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, | ||
94 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, | ||
95 | }; | ||
96 | |||
97 | static int lost_records_get(void *data, u64 *val) | ||
98 | { | ||
99 | struct kvm_trace *kt = data; | ||
100 | |||
101 | *val = atomic_read(&kt->lost_records); | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); | ||
106 | |||
107 | /* | ||
108 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how | ||
109 | * many times we encountered a full subbuffer, to tell user space app the | ||
110 | * lost records there were. | ||
111 | */ | ||
112 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, | ||
113 | void *prev_subbuf, size_t prev_padding) | ||
114 | { | ||
115 | struct kvm_trace *kt; | ||
116 | |||
117 | if (!relay_buf_full(buf)) | ||
118 | return 1; | ||
119 | |||
120 | kt = buf->chan->private_data; | ||
121 | atomic_inc(&kt->lost_records); | ||
122 | |||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | static struct dentry *kvm_create_buf_file_callack(const char *filename, | ||
127 | struct dentry *parent, | ||
128 | int mode, | ||
129 | struct rchan_buf *buf, | ||
130 | int *is_global) | ||
131 | { | ||
132 | return debugfs_create_file(filename, mode, parent, buf, | ||
133 | &relay_file_operations); | ||
134 | } | ||
135 | |||
136 | static int kvm_remove_buf_file_callback(struct dentry *dentry) | ||
137 | { | ||
138 | debugfs_remove(dentry); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static struct rchan_callbacks kvm_relay_callbacks = { | ||
143 | .subbuf_start = kvm_subbuf_start_callback, | ||
144 | .create_buf_file = kvm_create_buf_file_callack, | ||
145 | .remove_buf_file = kvm_remove_buf_file_callback, | ||
146 | }; | ||
147 | |||
148 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) | ||
149 | { | ||
150 | struct kvm_trace *kt; | ||
151 | int i, r = -ENOMEM; | ||
152 | |||
153 | if (!kuts->buf_size || !kuts->buf_nr) | ||
154 | return -EINVAL; | ||
155 | |||
156 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); | ||
157 | if (!kt) | ||
158 | goto err; | ||
159 | |||
160 | r = -EIO; | ||
161 | atomic_set(&kt->lost_records, 0); | ||
162 | kt->lost_file = debugfs_create_file("lost_records", 0444, debugfs_dir, | ||
163 | kt, &kvm_trace_lost_ops); | ||
164 | if (!kt->lost_file) | ||
165 | goto err; | ||
166 | |||
167 | kt->rchan = relay_open("trace", debugfs_dir, kuts->buf_size, | ||
168 | kuts->buf_nr, &kvm_relay_callbacks, kt); | ||
169 | if (!kt->rchan) | ||
170 | goto err; | ||
171 | |||
172 | kvm_trace = kt; | ||
173 | |||
174 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
175 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
176 | |||
177 | r = marker_probe_register(p->name, p->format, p->probe_func, p); | ||
178 | if (r) | ||
179 | printk(KERN_INFO "Unable to register probe %s\n", | ||
180 | p->name); | ||
181 | } | ||
182 | |||
183 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; | ||
184 | |||
185 | return 0; | ||
186 | err: | ||
187 | if (kt) { | ||
188 | if (kt->lost_file) | ||
189 | debugfs_remove(kt->lost_file); | ||
190 | if (kt->rchan) | ||
191 | relay_close(kt->rchan); | ||
192 | kfree(kt); | ||
193 | } | ||
194 | return r; | ||
195 | } | ||
196 | |||
197 | static int kvm_trace_enable(char __user *arg) | ||
198 | { | ||
199 | struct kvm_user_trace_setup kuts; | ||
200 | int ret; | ||
201 | |||
202 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); | ||
203 | if (ret) | ||
204 | return -EFAULT; | ||
205 | |||
206 | ret = do_kvm_trace_enable(&kuts); | ||
207 | if (ret) | ||
208 | return ret; | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int kvm_trace_pause(void) | ||
214 | { | ||
215 | struct kvm_trace *kt = kvm_trace; | ||
216 | int r = -EINVAL; | ||
217 | |||
218 | if (kt == NULL) | ||
219 | return r; | ||
220 | |||
221 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { | ||
222 | kt->trace_state = KVM_TRACE_STATE_PAUSE; | ||
223 | relay_flush(kt->rchan); | ||
224 | r = 0; | ||
225 | } | ||
226 | |||
227 | return r; | ||
228 | } | ||
229 | |||
230 | void kvm_trace_cleanup(void) | ||
231 | { | ||
232 | struct kvm_trace *kt = kvm_trace; | ||
233 | int i; | ||
234 | |||
235 | if (kt == NULL) | ||
236 | return; | ||
237 | |||
238 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || | ||
239 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { | ||
240 | |||
241 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; | ||
242 | |||
243 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { | ||
244 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; | ||
245 | marker_probe_unregister(p->name, p->probe_func, p); | ||
246 | } | ||
247 | |||
248 | relay_close(kt->rchan); | ||
249 | debugfs_remove(kt->lost_file); | ||
250 | kfree(kt); | ||
251 | } | ||
252 | } | ||
253 | |||
254 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) | ||
255 | { | ||
256 | void __user *argp = (void __user *)arg; | ||
257 | long r = -EINVAL; | ||
258 | |||
259 | if (!capable(CAP_SYS_ADMIN)) | ||
260 | return -EPERM; | ||
261 | |||
262 | switch (ioctl) { | ||
263 | case KVM_TRACE_ENABLE: | ||
264 | r = kvm_trace_enable(argp); | ||
265 | break; | ||
266 | case KVM_TRACE_PAUSE: | ||
267 | r = kvm_trace_pause(); | ||
268 | break; | ||
269 | case KVM_TRACE_DISABLE: | ||
270 | r = 0; | ||
271 | kvm_trace_cleanup(); | ||
272 | break; | ||
273 | } | ||
274 | |||
275 | return r; | ||
276 | } | ||