aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/builtin-kvm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-01 13:28:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-01 13:28:49 -0400
commit7e92daaefa68e5ef1e1732e45231e73adbb724e7 (patch)
tree8e7f8ac9d82654df4c65939c6682f95510e22977 /tools/perf/builtin-kvm.c
parent7a68294278ae714ce2632a54f0f46916dca64f56 (diff)
parent1d787d37c8ff6612b8151c6dff15bfa7347bcbdf (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf update from Ingo Molnar: "Lots of changes in this cycle as well, with hundreds of commits from over 30 contributors. Most of the activity was on the tooling side. Higher level changes: - New 'perf kvm' analysis tool, from Xiao Guangrong. - New 'perf trace' system-wide tracing tool - uprobes fixes + cleanups from Oleg Nesterov. - Lots of patches to make perf build on Android out of box, from Irina Tirdea - Extend ftrace function tracing utility to be more dynamic for its users. It allows for data passing to the callback functions, as well as reading regs as if a breakpoint were to trigger at function entry. The main goal of this patch series was to allow kprobes to use ftrace as an optimized probe point when a probe is placed on an ftrace nop. With lots of help from Masami Hiramatsu, and going through lots of iterations, we finally came up with a good solution. - Add cpumask for uncore pmu, use it in 'stat', from Yan, Zheng. - Various tracing updates from Steve Rostedt - Clean up and improve 'perf sched' performance by elliminating lots of needless calls to libtraceevent. - Event group parsing support, from Jiri Olsa - UI/gtk refactorings and improvements from Namhyung Kim - Add support for non-tracepoint events in perf script python, from Feng Tang - Add --symbols to 'script', similar to the one in 'report', from Feng Tang. Infrastructure enhancements and fixes: - Convert the trace builtins to use the growing evsel/evlist tracepoint infrastructure, removing several open coded constructs like switch like series of strcmp to dispatch events, etc. Basically what had already been showcased in 'perf sched'. - Add evsel constructor for tracepoints, that uses libtraceevent just to parse the /format events file, use it in a new 'perf test' to make sure the libtraceevent format parsing regressions can be more readily caught. - Some strange errors were happening in some builds, but not on the next, reported by several people, problem was some parser related files, generated during the build, didn't had proper make deps, fix from Eric Sandeen. - Introduce struct and cache information about the environment where a perf.data file was captured, from Namhyung Kim. - Fix handling of unresolved samples when --symbols is used in 'report', from Feng Tang. - Add union member access support to 'probe', from Hyeoncheol Lee. - Fixups to die() removal, from Namhyung Kim. - Render fixes for the TUI, from Namhyung Kim. - Don't enable annotation in non symbolic view, from Namhyung Kim. - Fix pipe mode in 'report', from Namhyung Kim. - Move related stats code from stat to util/, will be used by the 'stat' kvm tool, from Xiao Guangrong. - Remove die()/exit() calls from several tools. - Resolve vdso callchains, from Jiri Olsa - Don't pass const char pointers to basename, so that we can unconditionally use libgen.h and thus avoid ifdef BIONIC lines, from David Ahern - Refactor hist formatting so that it can be reused with the GTK browser, From Namhyung Kim - Fix build for another rbtree.c change, from Adrian Hunter. - Make 'perf diff' command work with evsel hists, from Jiri Olsa. - Use the only field_sep var that is set up: symbol_conf.field_sep, fix from Jiri Olsa. - .gitignore compiled python binaries, from Namhyung Kim. - Get rid of die() in more libtraceevent places, from Namhyung Kim. - Rename libtraceevent 'private' struct member to 'priv' so that it works in C++, from Steven Rostedt - Remove lots of exit()/die() calls from tools so that the main perf exit routine can take place, from David Ahern - Fix x86 build on x86-64, from David Ahern. - {int,str,rb}list fixes from Suzuki K Poulose - perf.data header fixes from Namhyung Kim - Allow user to indicate objdump path, needed in cross environments, from Maciek Borzecki - Fix hardware cache event name generation, fix from Jiri Olsa - Add round trip test for sw, hw and cache event names, catching the problem Jiri fixed, after Jiri's patch, the test passes successfully. - Clean target should do clean for lib/traceevent too, fix from David Ahern - Check the right variable for allocation failure, fix from Namhyung Kim - Set up evsel->tp_format regardless of evsel->name being set already, fix from Namhyung Kim - Oprofile fixes from Robert Richter. - Remove perf_event_attr needless version inflation, from Jiri Olsa - Introduce libtraceevent strerror like error reporting facility, from Namhyung Kim - Add pmu mappings to perf.data header and use event names from cmd line, from Robert Richter - Fix include order for bison/flex-generated C files, from Ben Hutchings - Build fixes and documentation corrections from David Ahern - Assorted cleanups from Robert Richter - Let O= makes handle relative paths, from Steven Rostedt - perf script python fixes, from Feng Tang. - Initial bash completion support, from Frederic Weisbecker - Allow building without libelf, from Namhyung Kim. - Support DWARF CFI based unwind to have callchains when %bp based unwinding is not possible, from Jiri Olsa. - Symbol resolution fixes, while fixing support PPC64 files with an .opt ELF section was the end goal, several fixes for code that handles all architectures and cleanups are included, from Cody Schafer. - Assorted fixes for Documentation and build in 32 bit, from Robert Richter - Cache the libtraceevent event_format associated to each evsel early, so that we avoid relookups, i.e. calling pevent_find_event repeatedly when processing tracepoint events. [ This is to reduce the surface contact with libtraceevents and make clear what is that the perf tools needs from that lib: so far parsing the common and per event fields. ] - Don't stop the build if the audit libraries are not installed, fix from Namhyung Kim. - Fix bfd.h/libbfd detection with recent binutils, from Markus Trippelsdorf. - Improve warning message when libunwind devel packages not present, from Jiri Olsa" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (282 commits) perf trace: Add aliases for some syscalls perf probe: Print an enum type variable in "enum variable-name" format when showing accessible variables perf tools: Check libaudit availability for perf-trace builtin perf hists: Add missing period_* fields when collapsing a hist entry perf trace: New tool perf evsel: Export the event_format constructor perf evsel: Introduce rawptr() method perf tools: Use perf_evsel__newtp in the event parser perf evsel: The tracepoint constructor should store sys:name perf evlist: Introduce set_filter() method perf evlist: Renane set_filters method to apply_filters perf test: Add test to check we correctly parse and match syscall open parms perf evsel: Handle endianity in intval method perf evsel: Know if byte swap is needed perf tools: Allow handling a NULL cpu_map as meaning "all cpus" perf evsel: Improve tracepoint constructor setup tools lib traceevent: Fix error path on pevent_parse_event perf test: Fix build failure trace: Move trace event enable from fs_initcall to core_initcall tracing: Add an option for disabling markers ...
Diffstat (limited to 'tools/perf/builtin-kvm.c')
-rw-r--r--tools/perf/builtin-kvm.c838
1 files changed, 834 insertions, 4 deletions
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 9fc6e0fa3dce..a28c9cad9048 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1,6 +1,7 @@
1#include "builtin.h" 1#include "builtin.h"
2#include "perf.h" 2#include "perf.h"
3 3
4#include "util/evsel.h"
4#include "util/util.h" 5#include "util/util.h"
5#include "util/cache.h" 6#include "util/cache.h"
6#include "util/symbol.h" 7#include "util/symbol.h"
@@ -10,8 +11,10 @@
10 11
11#include "util/parse-options.h" 12#include "util/parse-options.h"
12#include "util/trace-event.h" 13#include "util/trace-event.h"
13
14#include "util/debug.h" 14#include "util/debug.h"
15#include "util/debugfs.h"
16#include "util/tool.h"
17#include "util/stat.h"
15 18
16#include <sys/prctl.h> 19#include <sys/prctl.h>
17 20
@@ -19,11 +22,836 @@
19#include <pthread.h> 22#include <pthread.h>
20#include <math.h> 23#include <math.h>
21 24
22static const char *file_name; 25#include "../../arch/x86/include/asm/svm.h"
26#include "../../arch/x86/include/asm/vmx.h"
27#include "../../arch/x86/include/asm/kvm.h"
28
29struct event_key {
30 #define INVALID_KEY (~0ULL)
31 u64 key;
32 int info;
33};
34
35struct kvm_events_ops {
36 bool (*is_begin_event)(struct perf_evsel *evsel,
37 struct perf_sample *sample,
38 struct event_key *key);
39 bool (*is_end_event)(struct perf_evsel *evsel,
40 struct perf_sample *sample, struct event_key *key);
41 void (*decode_key)(struct event_key *key, char decode[20]);
42 const char *name;
43};
44
45static void exit_event_get_key(struct perf_evsel *evsel,
46 struct perf_sample *sample,
47 struct event_key *key)
48{
49 key->info = 0;
50 key->key = perf_evsel__intval(evsel, sample, "exit_reason");
51}
52
53static bool kvm_exit_event(struct perf_evsel *evsel)
54{
55 return !strcmp(evsel->name, "kvm:kvm_exit");
56}
57
58static bool exit_event_begin(struct perf_evsel *evsel,
59 struct perf_sample *sample, struct event_key *key)
60{
61 if (kvm_exit_event(evsel)) {
62 exit_event_get_key(evsel, sample, key);
63 return true;
64 }
65
66 return false;
67}
68
69static bool kvm_entry_event(struct perf_evsel *evsel)
70{
71 return !strcmp(evsel->name, "kvm:kvm_entry");
72}
73
74static bool exit_event_end(struct perf_evsel *evsel,
75 struct perf_sample *sample __maybe_unused,
76 struct event_key *key __maybe_unused)
77{
78 return kvm_entry_event(evsel);
79}
80
81struct exit_reasons_table {
82 unsigned long exit_code;
83 const char *reason;
84};
85
86struct exit_reasons_table vmx_exit_reasons[] = {
87 VMX_EXIT_REASONS
88};
89
90struct exit_reasons_table svm_exit_reasons[] = {
91 SVM_EXIT_REASONS
92};
93
94static int cpu_isa;
95
96static const char *get_exit_reason(u64 exit_code)
97{
98 int table_size = ARRAY_SIZE(svm_exit_reasons);
99 struct exit_reasons_table *table = svm_exit_reasons;
100
101 if (cpu_isa == 1) {
102 table = vmx_exit_reasons;
103 table_size = ARRAY_SIZE(vmx_exit_reasons);
104 }
105
106 while (table_size--) {
107 if (table->exit_code == exit_code)
108 return table->reason;
109 table++;
110 }
111
112 pr_err("unknown kvm exit code:%lld on %s\n",
113 (unsigned long long)exit_code, cpu_isa ? "VMX" : "SVM");
114 return "UNKNOWN";
115}
116
117static void exit_event_decode_key(struct event_key *key, char decode[20])
118{
119 const char *exit_reason = get_exit_reason(key->key);
120
121 scnprintf(decode, 20, "%s", exit_reason);
122}
123
124static struct kvm_events_ops exit_events = {
125 .is_begin_event = exit_event_begin,
126 .is_end_event = exit_event_end,
127 .decode_key = exit_event_decode_key,
128 .name = "VM-EXIT"
129};
130
131 /*
132 * For the mmio events, we treat:
133 * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
134 * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
135 */
136static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
137 struct event_key *key)
138{
139 key->key = perf_evsel__intval(evsel, sample, "gpa");
140 key->info = perf_evsel__intval(evsel, sample, "type");
141}
142
143#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
144#define KVM_TRACE_MMIO_READ 1
145#define KVM_TRACE_MMIO_WRITE 2
146
147static bool mmio_event_begin(struct perf_evsel *evsel,
148 struct perf_sample *sample, struct event_key *key)
149{
150 /* MMIO read begin event in kernel. */
151 if (kvm_exit_event(evsel))
152 return true;
153
154 /* MMIO write begin event in kernel. */
155 if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
156 perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
157 mmio_event_get_key(evsel, sample, key);
158 return true;
159 }
160
161 return false;
162}
163
164static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
165 struct event_key *key)
166{
167 /* MMIO write end event in kernel. */
168 if (kvm_entry_event(evsel))
169 return true;
170
171 /* MMIO read end event in kernel.*/
172 if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
173 perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
174 mmio_event_get_key(evsel, sample, key);
175 return true;
176 }
177
178 return false;
179}
180
181static void mmio_event_decode_key(struct event_key *key, char decode[20])
182{
183 scnprintf(decode, 20, "%#lx:%s", (unsigned long)key->key,
184 key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
185}
186
187static struct kvm_events_ops mmio_events = {
188 .is_begin_event = mmio_event_begin,
189 .is_end_event = mmio_event_end,
190 .decode_key = mmio_event_decode_key,
191 .name = "MMIO Access"
192};
193
194 /* The time of emulation pio access is from kvm_pio to kvm_entry. */
195static void ioport_event_get_key(struct perf_evsel *evsel,
196 struct perf_sample *sample,
197 struct event_key *key)
198{
199 key->key = perf_evsel__intval(evsel, sample, "port");
200 key->info = perf_evsel__intval(evsel, sample, "rw");
201}
202
203static bool ioport_event_begin(struct perf_evsel *evsel,
204 struct perf_sample *sample,
205 struct event_key *key)
206{
207 if (!strcmp(evsel->name, "kvm:kvm_pio")) {
208 ioport_event_get_key(evsel, sample, key);
209 return true;
210 }
211
212 return false;
213}
214
215static bool ioport_event_end(struct perf_evsel *evsel,
216 struct perf_sample *sample __maybe_unused,
217 struct event_key *key __maybe_unused)
218{
219 return kvm_entry_event(evsel);
220}
221
222static void ioport_event_decode_key(struct event_key *key, char decode[20])
223{
224 scnprintf(decode, 20, "%#llx:%s", (unsigned long long)key->key,
225 key->info ? "POUT" : "PIN");
226}
227
228static struct kvm_events_ops ioport_events = {
229 .is_begin_event = ioport_event_begin,
230 .is_end_event = ioport_event_end,
231 .decode_key = ioport_event_decode_key,
232 .name = "IO Port Access"
233};
234
235static const char *report_event = "vmexit";
236struct kvm_events_ops *events_ops;
237
238static bool register_kvm_events_ops(void)
239{
240 bool ret = true;
241
242 if (!strcmp(report_event, "vmexit"))
243 events_ops = &exit_events;
244 else if (!strcmp(report_event, "mmio"))
245 events_ops = &mmio_events;
246 else if (!strcmp(report_event, "ioport"))
247 events_ops = &ioport_events;
248 else {
249 pr_err("Unknown report event:%s\n", report_event);
250 ret = false;
251 }
252
253 return ret;
254}
255
256struct kvm_event_stats {
257 u64 time;
258 struct stats stats;
259};
260
261struct kvm_event {
262 struct list_head hash_entry;
263 struct rb_node rb;
264
265 struct event_key key;
266
267 struct kvm_event_stats total;
268
269 #define DEFAULT_VCPU_NUM 8
270 int max_vcpu;
271 struct kvm_event_stats *vcpu;
272};
273
274struct vcpu_event_record {
275 int vcpu_id;
276 u64 start_time;
277 struct kvm_event *last_event;
278};
279
280#define EVENTS_BITS 12
281#define EVENTS_CACHE_SIZE (1UL << EVENTS_BITS)
282
283static u64 total_time;
284static u64 total_count;
285static struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
286
287static void init_kvm_event_record(void)
288{
289 int i;
290
291 for (i = 0; i < (int)EVENTS_CACHE_SIZE; i++)
292 INIT_LIST_HEAD(&kvm_events_cache[i]);
293}
294
295static int kvm_events_hash_fn(u64 key)
296{
297 return key & (EVENTS_CACHE_SIZE - 1);
298}
299
300static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
301{
302 int old_max_vcpu = event->max_vcpu;
303
304 if (vcpu_id < event->max_vcpu)
305 return true;
306
307 while (event->max_vcpu <= vcpu_id)
308 event->max_vcpu += DEFAULT_VCPU_NUM;
309
310 event->vcpu = realloc(event->vcpu,
311 event->max_vcpu * sizeof(*event->vcpu));
312 if (!event->vcpu) {
313 pr_err("Not enough memory\n");
314 return false;
315 }
316
317 memset(event->vcpu + old_max_vcpu, 0,
318 (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
319 return true;
320}
321
322static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
323{
324 struct kvm_event *event;
325
326 event = zalloc(sizeof(*event));
327 if (!event) {
328 pr_err("Not enough memory\n");
329 return NULL;
330 }
331
332 event->key = *key;
333 return event;
334}
335
336static struct kvm_event *find_create_kvm_event(struct event_key *key)
337{
338 struct kvm_event *event;
339 struct list_head *head;
340
341 BUG_ON(key->key == INVALID_KEY);
342
343 head = &kvm_events_cache[kvm_events_hash_fn(key->key)];
344 list_for_each_entry(event, head, hash_entry)
345 if (event->key.key == key->key && event->key.info == key->info)
346 return event;
347
348 event = kvm_alloc_init_event(key);
349 if (!event)
350 return NULL;
351
352 list_add(&event->hash_entry, head);
353 return event;
354}
355
356static bool handle_begin_event(struct vcpu_event_record *vcpu_record,
357 struct event_key *key, u64 timestamp)
358{
359 struct kvm_event *event = NULL;
360
361 if (key->key != INVALID_KEY)
362 event = find_create_kvm_event(key);
363
364 vcpu_record->last_event = event;
365 vcpu_record->start_time = timestamp;
366 return true;
367}
368
369static void
370kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
371{
372 kvm_stats->time += time_diff;
373 update_stats(&kvm_stats->stats, time_diff);
374}
375
376static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
377{
378 struct kvm_event_stats *kvm_stats = &event->total;
379
380 if (vcpu_id != -1)
381 kvm_stats = &event->vcpu[vcpu_id];
382
383 return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
384 avg_stats(&kvm_stats->stats));
385}
386
387static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
388 u64 time_diff)
389{
390 kvm_update_event_stats(&event->total, time_diff);
391
392 if (!kvm_event_expand(event, vcpu_id))
393 return false;
394
395 kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
396 return true;
397}
398
399static bool handle_end_event(struct vcpu_event_record *vcpu_record,
400 struct event_key *key, u64 timestamp)
401{
402 struct kvm_event *event;
403 u64 time_begin, time_diff;
404
405 event = vcpu_record->last_event;
406 time_begin = vcpu_record->start_time;
407
408 /* The begin event is not caught. */
409 if (!time_begin)
410 return true;
411
412 /*
413 * In some case, the 'begin event' only records the start timestamp,
414 * the actual event is recognized in the 'end event' (e.g. mmio-event).
415 */
416
417 /* Both begin and end events did not get the key. */
418 if (!event && key->key == INVALID_KEY)
419 return true;
420
421 if (!event)
422 event = find_create_kvm_event(key);
423
424 if (!event)
425 return false;
426
427 vcpu_record->last_event = NULL;
428 vcpu_record->start_time = 0;
429
430 BUG_ON(timestamp < time_begin);
431
432 time_diff = timestamp - time_begin;
433 return update_kvm_event(event, vcpu_record->vcpu_id, time_diff);
434}
435
436static
437struct vcpu_event_record *per_vcpu_record(struct thread *thread,
438 struct perf_evsel *evsel,
439 struct perf_sample *sample)
440{
441 /* Only kvm_entry records vcpu id. */
442 if (!thread->priv && kvm_entry_event(evsel)) {
443 struct vcpu_event_record *vcpu_record;
444
445 vcpu_record = zalloc(sizeof(*vcpu_record));
446 if (!vcpu_record) {
447 pr_err("%s: Not enough memory\n", __func__);
448 return NULL;
449 }
450
451 vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, "vcpu_id");
452 thread->priv = vcpu_record;
453 }
454
455 return thread->priv;
456}
457
458static bool handle_kvm_event(struct thread *thread, struct perf_evsel *evsel,
459 struct perf_sample *sample)
460{
461 struct vcpu_event_record *vcpu_record;
462 struct event_key key = {.key = INVALID_KEY};
463
464 vcpu_record = per_vcpu_record(thread, evsel, sample);
465 if (!vcpu_record)
466 return true;
467
468 if (events_ops->is_begin_event(evsel, sample, &key))
469 return handle_begin_event(vcpu_record, &key, sample->time);
470
471 if (events_ops->is_end_event(evsel, sample, &key))
472 return handle_end_event(vcpu_record, &key, sample->time);
473
474 return true;
475}
476
477typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
478struct kvm_event_key {
479 const char *name;
480 key_cmp_fun key;
481};
482
483static int trace_vcpu = -1;
484#define GET_EVENT_KEY(func, field) \
485static u64 get_event_ ##func(struct kvm_event *event, int vcpu) \
486{ \
487 if (vcpu == -1) \
488 return event->total.field; \
489 \
490 if (vcpu >= event->max_vcpu) \
491 return 0; \
492 \
493 return event->vcpu[vcpu].field; \
494}
495
496#define COMPARE_EVENT_KEY(func, field) \
497GET_EVENT_KEY(func, field) \
498static int compare_kvm_event_ ## func(struct kvm_event *one, \
499 struct kvm_event *two, int vcpu)\
500{ \
501 return get_event_ ##func(one, vcpu) > \
502 get_event_ ##func(two, vcpu); \
503}
504
505GET_EVENT_KEY(time, time);
506COMPARE_EVENT_KEY(count, stats.n);
507COMPARE_EVENT_KEY(mean, stats.mean);
508
509#define DEF_SORT_NAME_KEY(name, compare_key) \
510 { #name, compare_kvm_event_ ## compare_key }
511
512static struct kvm_event_key keys[] = {
513 DEF_SORT_NAME_KEY(sample, count),
514 DEF_SORT_NAME_KEY(time, mean),
515 { NULL, NULL }
516};
517
518static const char *sort_key = "sample";
519static key_cmp_fun compare;
520
521static bool select_key(void)
522{
523 int i;
524
525 for (i = 0; keys[i].name; i++) {
526 if (!strcmp(keys[i].name, sort_key)) {
527 compare = keys[i].key;
528 return true;
529 }
530 }
531
532 pr_err("Unknown compare key:%s\n", sort_key);
533 return false;
534}
535
536static struct rb_root result;
537static void insert_to_result(struct kvm_event *event, key_cmp_fun bigger,
538 int vcpu)
539{
540 struct rb_node **rb = &result.rb_node;
541 struct rb_node *parent = NULL;
542 struct kvm_event *p;
543
544 while (*rb) {
545 p = container_of(*rb, struct kvm_event, rb);
546 parent = *rb;
547
548 if (bigger(event, p, vcpu))
549 rb = &(*rb)->rb_left;
550 else
551 rb = &(*rb)->rb_right;
552 }
553
554 rb_link_node(&event->rb, parent, rb);
555 rb_insert_color(&event->rb, &result);
556}
557
558static void update_total_count(struct kvm_event *event, int vcpu)
559{
560 total_count += get_event_count(event, vcpu);
561 total_time += get_event_time(event, vcpu);
562}
563
564static bool event_is_valid(struct kvm_event *event, int vcpu)
565{
566 return !!get_event_count(event, vcpu);
567}
568
569static void sort_result(int vcpu)
570{
571 unsigned int i;
572 struct kvm_event *event;
573
574 for (i = 0; i < EVENTS_CACHE_SIZE; i++)
575 list_for_each_entry(event, &kvm_events_cache[i], hash_entry)
576 if (event_is_valid(event, vcpu)) {
577 update_total_count(event, vcpu);
578 insert_to_result(event, compare, vcpu);
579 }
580}
581
582/* returns left most element of result, and erase it */
583static struct kvm_event *pop_from_result(void)
584{
585 struct rb_node *node = rb_first(&result);
586
587 if (!node)
588 return NULL;
589
590 rb_erase(node, &result);
591 return container_of(node, struct kvm_event, rb);
592}
593
594static void print_vcpu_info(int vcpu)
595{
596 pr_info("Analyze events for ");
597
598 if (vcpu == -1)
599 pr_info("all VCPUs:\n\n");
600 else
601 pr_info("VCPU %d:\n\n", vcpu);
602}
603
604static void print_result(int vcpu)
605{
606 char decode[20];
607 struct kvm_event *event;
608
609 pr_info("\n\n");
610 print_vcpu_info(vcpu);
611 pr_info("%20s ", events_ops->name);
612 pr_info("%10s ", "Samples");
613 pr_info("%9s ", "Samples%");
614
615 pr_info("%9s ", "Time%");
616 pr_info("%16s ", "Avg time");
617 pr_info("\n\n");
618
619 while ((event = pop_from_result())) {
620 u64 ecount, etime;
621
622 ecount = get_event_count(event, vcpu);
623 etime = get_event_time(event, vcpu);
624
625 events_ops->decode_key(&event->key, decode);
626 pr_info("%20s ", decode);
627 pr_info("%10llu ", (unsigned long long)ecount);
628 pr_info("%8.2f%% ", (double)ecount / total_count * 100);
629 pr_info("%8.2f%% ", (double)etime / total_time * 100);
630 pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
631 kvm_event_rel_stddev(vcpu, event));
632 pr_info("\n");
633 }
634
635 pr_info("\nTotal Samples:%lld, Total events handled time:%.2fus.\n\n",
636 (unsigned long long)total_count, total_time / 1e3);
637}
638
639static int process_sample_event(struct perf_tool *tool __maybe_unused,
640 union perf_event *event,
641 struct perf_sample *sample,
642 struct perf_evsel *evsel,
643 struct machine *machine)
644{
645 struct thread *thread = machine__findnew_thread(machine, sample->tid);
646
647 if (thread == NULL) {
648 pr_debug("problem processing %d event, skipping it.\n",
649 event->header.type);
650 return -1;
651 }
652
653 if (!handle_kvm_event(thread, evsel, sample))
654 return -1;
655
656 return 0;
657}
658
659static struct perf_tool eops = {
660 .sample = process_sample_event,
661 .comm = perf_event__process_comm,
662 .ordered_samples = true,
663};
664
665static int get_cpu_isa(struct perf_session *session)
666{
667 char *cpuid = session->header.env.cpuid;
668 int isa;
669
670 if (strstr(cpuid, "Intel"))
671 isa = 1;
672 else if (strstr(cpuid, "AMD"))
673 isa = 0;
674 else {
675 pr_err("CPU %s is not supported.\n", cpuid);
676 isa = -ENOTSUP;
677 }
678
679 return isa;
680}
681
682static const char *file_name;
683
684static int read_events(void)
685{
686 struct perf_session *kvm_session;
687 int ret;
688
689 kvm_session = perf_session__new(file_name, O_RDONLY, 0, false, &eops);
690 if (!kvm_session) {
691 pr_err("Initializing perf session failed\n");
692 return -EINVAL;
693 }
694
695 if (!perf_session__has_traces(kvm_session, "kvm record"))
696 return -EINVAL;
697
698 /*
699 * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
700 * traced in the old kernel.
701 */
702 ret = get_cpu_isa(kvm_session);
703
704 if (ret < 0)
705 return ret;
706
707 cpu_isa = ret;
708
709 return perf_session__process_events(kvm_session, &eops);
710}
711
712static bool verify_vcpu(int vcpu)
713{
714 if (vcpu != -1 && vcpu < 0) {
715 pr_err("Invalid vcpu:%d.\n", vcpu);
716 return false;
717 }
718
719 return true;
720}
721
722static int kvm_events_report_vcpu(int vcpu)
723{
724 int ret = -EINVAL;
725
726 if (!verify_vcpu(vcpu))
727 goto exit;
728
729 if (!select_key())
730 goto exit;
731
732 if (!register_kvm_events_ops())
733 goto exit;
734
735 init_kvm_event_record();
736 setup_pager();
737
738 ret = read_events();
739 if (ret)
740 goto exit;
741
742 sort_result(vcpu);
743 print_result(vcpu);
744exit:
745 return ret;
746}
747
748static const char * const record_args[] = {
749 "record",
750 "-R",
751 "-f",
752 "-m", "1024",
753 "-c", "1",
754 "-e", "kvm:kvm_entry",
755 "-e", "kvm:kvm_exit",
756 "-e", "kvm:kvm_mmio",
757 "-e", "kvm:kvm_pio",
758};
759
760#define STRDUP_FAIL_EXIT(s) \
761 ({ char *_p; \
762 _p = strdup(s); \
763 if (!_p) \
764 return -ENOMEM; \
765 _p; \
766 })
767
768static int kvm_events_record(int argc, const char **argv)
769{
770 unsigned int rec_argc, i, j;
771 const char **rec_argv;
772
773 rec_argc = ARRAY_SIZE(record_args) + argc + 2;
774 rec_argv = calloc(rec_argc + 1, sizeof(char *));
775
776 if (rec_argv == NULL)
777 return -ENOMEM;
778
779 for (i = 0; i < ARRAY_SIZE(record_args); i++)
780 rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
781
782 rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
783 rec_argv[i++] = STRDUP_FAIL_EXIT(file_name);
784
785 for (j = 1; j < (unsigned int)argc; j++, i++)
786 rec_argv[i] = argv[j];
787
788 return cmd_record(i, rec_argv, NULL);
789}
790
791static const char * const kvm_events_report_usage[] = {
792 "perf kvm stat report [<options>]",
793 NULL
794};
795
796static const struct option kvm_events_report_options[] = {
797 OPT_STRING(0, "event", &report_event, "report event",
798 "event for reporting: vmexit, mmio, ioport"),
799 OPT_INTEGER(0, "vcpu", &trace_vcpu,
800 "vcpu id to report"),
801 OPT_STRING('k', "key", &sort_key, "sort-key",
802 "key for sorting: sample(sort by samples number)"
803 " time (sort by avg time)"),
804 OPT_END()
805};
806
807static int kvm_events_report(int argc, const char **argv)
808{
809 symbol__init();
810
811 if (argc) {
812 argc = parse_options(argc, argv,
813 kvm_events_report_options,
814 kvm_events_report_usage, 0);
815 if (argc)
816 usage_with_options(kvm_events_report_usage,
817 kvm_events_report_options);
818 }
819
820 return kvm_events_report_vcpu(trace_vcpu);
821}
822
823static void print_kvm_stat_usage(void)
824{
825 printf("Usage: perf kvm stat <command>\n\n");
826
827 printf("# Available commands:\n");
828 printf("\trecord: record kvm events\n");
829 printf("\treport: report statistical data of kvm events\n");
830
831 printf("\nOtherwise, it is the alias of 'perf stat':\n");
832}
833
834static int kvm_cmd_stat(int argc, const char **argv)
835{
836 if (argc == 1) {
837 print_kvm_stat_usage();
838 goto perf_stat;
839 }
840
841 if (!strncmp(argv[1], "rec", 3))
842 return kvm_events_record(argc - 1, argv + 1);
843
844 if (!strncmp(argv[1], "rep", 3))
845 return kvm_events_report(argc - 1 , argv + 1);
846
847perf_stat:
848 return cmd_stat(argc, argv, NULL);
849}
850
23static char name_buffer[256]; 851static char name_buffer[256];
24 852
25static const char * const kvm_usage[] = { 853static const char * const kvm_usage[] = {
26 "perf kvm [<options>] {top|record|report|diff|buildid-list}", 854 "perf kvm [<options>] {top|record|report|diff|buildid-list|stat}",
27 NULL 855 NULL
28}; 856};
29 857
@@ -102,7 +930,7 @@ static int __cmd_buildid_list(int argc, const char **argv)
102 return cmd_buildid_list(i, rec_argv, NULL); 930 return cmd_buildid_list(i, rec_argv, NULL);
103} 931}
104 932
105int cmd_kvm(int argc, const char **argv, const char *prefix __used) 933int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
106{ 934{
107 perf_host = 0; 935 perf_host = 0;
108 perf_guest = 1; 936 perf_guest = 1;
@@ -135,6 +963,8 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __used)
135 return cmd_top(argc, argv, NULL); 963 return cmd_top(argc, argv, NULL);
136 else if (!strncmp(argv[0], "buildid-list", 12)) 964 else if (!strncmp(argv[0], "buildid-list", 12))
137 return __cmd_buildid_list(argc, argv); 965 return __cmd_buildid_list(argc, argv);
966 else if (!strncmp(argv[0], "stat", 4))
967 return kvm_cmd_stat(argc, argv);
138 else 968 else
139 usage_with_options(kvm_usage, kvm_options); 969 usage_with_options(kvm_usage, kvm_options);
140 970