aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/Kconfig11
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--include/linux/kvm_host.h14
-rw-r--r--virt/kvm/kvm_main.c8
-rw-r--r--virt/kvm/kvm_trace.c276
5 files changed, 311 insertions, 1 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 76c70ab44382..8d45fabc5f3b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -50,6 +50,17 @@ config KVM_AMD
50 Provides support for KVM on AMD processors equipped with the AMD-V 50 Provides support for KVM on AMD processors equipped with the AMD-V
51 (SVM) extensions. 51 (SVM) extensions.
52 52
53config KVM_TRACE
54 bool "KVM trace support"
55 depends on KVM && MARKERS && SYSFS
56 select RELAY
57 select DEBUG_FS
58 default n
59 ---help---
60 This option allows reading a trace of kvm-related events through
61 relayfs. Note the ABI is not considered stable and will be
62 modified in future updates.
63
53# OK, it's a little counter-intuitive to do this, but it puts it neatly under 64# OK, it's a little counter-intuitive to do this, but it puts it neatly under
54# the virtualization menu. 65# the virtualization menu.
55source drivers/lguest/Kconfig 66source drivers/lguest/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4d0c22e11f1a..c97d35c218db 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,6 +3,9 @@
3# 3#
4 4
5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) 5common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
6ifeq ($(CONFIG_KVM_TRACE),y)
7common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
8endif
6 9
7EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 10EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
8 11
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 578c3638bbba..bd0c2d2d840f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -15,6 +15,7 @@
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/preempt.h> 17#include <linux/preempt.h>
18#include <linux/marker.h>
18#include <asm/signal.h> 19#include <asm/signal.h>
19 20
20#include <linux/kvm.h> 21#include <linux/kvm.h>
@@ -309,5 +310,18 @@ struct kvm_stats_debugfs_item {
309 struct dentry *dentry; 310 struct dentry *dentry;
310}; 311};
311extern struct kvm_stats_debugfs_item debugfs_entries[]; 312extern struct kvm_stats_debugfs_item debugfs_entries[];
313extern struct dentry *debugfs_dir;
314
315#ifdef CONFIG_KVM_TRACE
316int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
317void kvm_trace_cleanup(void);
318#else
319static inline
320int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
321{
322 return -EINVAL;
323}
324#define kvm_trace_cleanup() ((void)0)
325#endif
312 326
313#endif 327#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6a52c084e068..d5911d9895c3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
60 60
61static __read_mostly struct preempt_ops kvm_preempt_ops; 61static __read_mostly struct preempt_ops kvm_preempt_ops;
62 62
63static struct dentry *debugfs_dir; 63struct dentry *debugfs_dir;
64 64
65static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, 65static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
66 unsigned long arg); 66 unsigned long arg);
@@ -1191,6 +1191,11 @@ static long kvm_dev_ioctl(struct file *filp,
1191 r += PAGE_SIZE; /* pio data page */ 1191 r += PAGE_SIZE; /* pio data page */
1192#endif 1192#endif
1193 break; 1193 break;
1194 case KVM_TRACE_ENABLE:
1195 case KVM_TRACE_PAUSE:
1196 case KVM_TRACE_DISABLE:
1197 r = kvm_trace_ioctl(ioctl, arg);
1198 break;
1194 default: 1199 default:
1195 return kvm_arch_dev_ioctl(filp, ioctl, arg); 1200 return kvm_arch_dev_ioctl(filp, ioctl, arg);
1196 } 1201 }
@@ -1519,6 +1524,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
1519 1524
1520void kvm_exit(void) 1525void kvm_exit(void)
1521{ 1526{
1527 kvm_trace_cleanup();
1522 misc_deregister(&kvm_dev); 1528 misc_deregister(&kvm_dev);
1523 kmem_cache_destroy(kvm_vcpu_cache); 1529 kmem_cache_destroy(kvm_vcpu_cache);
1524 sysdev_unregister(&kvm_sysdev); 1530 sysdev_unregister(&kvm_sysdev);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
new file mode 100644
index 000000000000..5425440c54bf
--- /dev/null
+++ b/virt/kvm/kvm_trace.c
@@ -0,0 +1,276 @@
1/*
2 * kvm trace
3 *
4 * It is designed to allow debugging traces of kvm to be generated
5 * on UP / SMP machines. Each trace entry can be timestamped so that
6 * it's possible to reconstruct a chronological record of trace events.
7 * The implementation refers to blktrace kernel support.
8 *
9 * Copyright (c) 2008 Intel Corporation
10 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11 *
12 * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
13 *
14 * Date: Feb 2008
15 */
16
17#include <linux/module.h>
18#include <linux/relay.h>
19#include <linux/debugfs.h>
20
21#include <linux/kvm_host.h>
22
23#define KVM_TRACE_STATE_RUNNING (1 << 0)
24#define KVM_TRACE_STATE_PAUSE (1 << 1)
25#define KVM_TRACE_STATE_CLEARUP (1 << 2)
26
27struct kvm_trace {
28 int trace_state;
29 struct rchan *rchan;
30 struct dentry *lost_file;
31 atomic_t lost_records;
32};
33static struct kvm_trace *kvm_trace;
34
35struct kvm_trace_probe {
36 const char *name;
37 const char *format;
38 u32 cycle_in;
39 marker_probe_func *probe_func;
40};
41
42static inline int calc_rec_size(int cycle, int extra)
43{
44 int rec_size = KVM_TRC_HEAD_SIZE;
45
46 rec_size += extra;
47 return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
48}
49
50static void kvm_add_trace(void *probe_private, void *call_data,
51 const char *format, va_list *args)
52{
53 struct kvm_trace_probe *p = probe_private;
54 struct kvm_trace *kt = kvm_trace;
55 struct kvm_trace_rec rec;
56 struct kvm_vcpu *vcpu;
57 int i, extra, size;
58
59 if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
60 return;
61
62 rec.event = va_arg(*args, u32);
63 vcpu = va_arg(*args, struct kvm_vcpu *);
64 rec.pid = current->tgid;
65 rec.vcpu_id = vcpu->vcpu_id;
66
67 extra = va_arg(*args, u32);
68 WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
69 extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
70 rec.extra_u32 = extra;
71
72 rec.cycle_in = p->cycle_in;
73
74 if (rec.cycle_in) {
75 u64 cycle = 0;
76
77 cycle = get_cycles();
78 rec.u.cycle.cycle_lo = (u32)cycle;
79 rec.u.cycle.cycle_hi = (u32)(cycle >> 32);
80
81 for (i = 0; i < rec.extra_u32; i++)
82 rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
83 } else {
84 for (i = 0; i < rec.extra_u32; i++)
85 rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
86 }
87
88 size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
89 relay_write(kt->rchan, &rec, size);
90}
91
92static struct kvm_trace_probe kvm_trace_probes[] = {
93 { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
94 { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
95};
96
97static int lost_records_get(void *data, u64 *val)
98{
99 struct kvm_trace *kt = data;
100
101 *val = atomic_read(&kt->lost_records);
102 return 0;
103}
104
105DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
106
107/*
108 * The relay channel is used in "no-overwrite" mode, it keeps trace of how
109 * many times we encountered a full subbuffer, to tell user space app the
110 * lost records there were.
111 */
112static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
113 void *prev_subbuf, size_t prev_padding)
114{
115 struct kvm_trace *kt;
116
117 if (!relay_buf_full(buf))
118 return 1;
119
120 kt = buf->chan->private_data;
121 atomic_inc(&kt->lost_records);
122
123 return 0;
124}
125
126static struct dentry *kvm_create_buf_file_callack(const char *filename,
127 struct dentry *parent,
128 int mode,
129 struct rchan_buf *buf,
130 int *is_global)
131{
132 return debugfs_create_file(filename, mode, parent, buf,
133 &relay_file_operations);
134}
135
136static int kvm_remove_buf_file_callback(struct dentry *dentry)
137{
138 debugfs_remove(dentry);
139 return 0;
140}
141
142static struct rchan_callbacks kvm_relay_callbacks = {
143 .subbuf_start = kvm_subbuf_start_callback,
144 .create_buf_file = kvm_create_buf_file_callack,
145 .remove_buf_file = kvm_remove_buf_file_callback,
146};
147
148static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
149{
150 struct kvm_trace *kt;
151 int i, r = -ENOMEM;
152
153 if (!kuts->buf_size || !kuts->buf_nr)
154 return -EINVAL;
155
156 kt = kzalloc(sizeof(*kt), GFP_KERNEL);
157 if (!kt)
158 goto err;
159
160 r = -EIO;
161 atomic_set(&kt->lost_records, 0);
162 kt->lost_file = debugfs_create_file("lost_records", 0444, debugfs_dir,
163 kt, &kvm_trace_lost_ops);
164 if (!kt->lost_file)
165 goto err;
166
167 kt->rchan = relay_open("trace", debugfs_dir, kuts->buf_size,
168 kuts->buf_nr, &kvm_relay_callbacks, kt);
169 if (!kt->rchan)
170 goto err;
171
172 kvm_trace = kt;
173
174 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
175 struct kvm_trace_probe *p = &kvm_trace_probes[i];
176
177 r = marker_probe_register(p->name, p->format, p->probe_func, p);
178 if (r)
179 printk(KERN_INFO "Unable to register probe %s\n",
180 p->name);
181 }
182
183 kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
184
185 return 0;
186err:
187 if (kt) {
188 if (kt->lost_file)
189 debugfs_remove(kt->lost_file);
190 if (kt->rchan)
191 relay_close(kt->rchan);
192 kfree(kt);
193 }
194 return r;
195}
196
197static int kvm_trace_enable(char __user *arg)
198{
199 struct kvm_user_trace_setup kuts;
200 int ret;
201
202 ret = copy_from_user(&kuts, arg, sizeof(kuts));
203 if (ret)
204 return -EFAULT;
205
206 ret = do_kvm_trace_enable(&kuts);
207 if (ret)
208 return ret;
209
210 return 0;
211}
212
213static int kvm_trace_pause(void)
214{
215 struct kvm_trace *kt = kvm_trace;
216 int r = -EINVAL;
217
218 if (kt == NULL)
219 return r;
220
221 if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
222 kt->trace_state = KVM_TRACE_STATE_PAUSE;
223 relay_flush(kt->rchan);
224 r = 0;
225 }
226
227 return r;
228}
229
230void kvm_trace_cleanup(void)
231{
232 struct kvm_trace *kt = kvm_trace;
233 int i;
234
235 if (kt == NULL)
236 return;
237
238 if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
239 kt->trace_state == KVM_TRACE_STATE_PAUSE) {
240
241 kt->trace_state = KVM_TRACE_STATE_CLEARUP;
242
243 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
244 struct kvm_trace_probe *p = &kvm_trace_probes[i];
245 marker_probe_unregister(p->name, p->probe_func, p);
246 }
247
248 relay_close(kt->rchan);
249 debugfs_remove(kt->lost_file);
250 kfree(kt);
251 }
252}
253
254int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
255{
256 void __user *argp = (void __user *)arg;
257 long r = -EINVAL;
258
259 if (!capable(CAP_SYS_ADMIN))
260 return -EPERM;
261
262 switch (ioctl) {
263 case KVM_TRACE_ENABLE:
264 r = kvm_trace_enable(argp);
265 break;
266 case KVM_TRACE_PAUSE:
267 r = kvm_trace_pause();
268 break;
269 case KVM_TRACE_DISABLE:
270 r = 0;
271 kvm_trace_cleanup();
272 break;
273 }
274
275 return r;
276}