aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 16:41:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 16:41:54 -0400
commite28e909c36bb5d6319953822d84df00fce7cbd18 (patch)
treea4aca971908a7a604c6fdd9a95360728f9f721b3 /tools
parentdc03c0f9d12d85286d5e3623aa96d5c2a271b8e6 (diff)
parentfabc712866435660f7fa1070e1fabe29eba5bc4c (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull second batch of KVM updates from Radim Krčmář: "General: - move kvm_stat tool from QEMU repo into tools/kvm/kvm_stat (kvm_stat had nothing to do with QEMU in the first place -- the tool only interprets debugfs) - expose per-vm statistics in debugfs and support them in kvm_stat (KVM always collected per-vm statistics, but they were summarised into global statistics) x86: - fix dynamic APICv (VMX was improperly configured and a guest could access host's APIC MSRs, CVE-2016-4440) - minor fixes ARM changes from Christoffer Dall: - new vgic reimplementation of our horribly broken legacy vgic implementation. The two implementations will live side-by-side (with the new being the configured default) for one kernel release and then we'll remove the legacy one. - fix for a non-critical issue with virtual abort injection to guests" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (70 commits) tools: kvm_stat: Add comments tools: kvm_stat: Introduce pid monitoring KVM: Create debugfs dir and stat files for each VM MAINTAINERS: Add kvm tools tools: kvm_stat: Powerpc related fixes tools: Add kvm_stat man page tools: Add kvm_stat vm monitor script kvm:vmx: more complete state update on APICv on/off KVM: SVM: Add more SVM_EXIT_REASONS KVM: Unify traced vector format svm: bitwise vs logical op typo KVM: arm/arm64: vgic-new: Synchronize changes to active state KVM: arm/arm64: vgic-new: enable build KVM: arm/arm64: vgic-new: implement mapped IRQ handling KVM: arm/arm64: vgic-new: Wire up irqfd injection KVM: arm/arm64: vgic-new: Add vgic_v2/v3_enable KVM: arm/arm64: vgic-new: vgic_init: implement map_resources KVM: arm/arm64: vgic-new: vgic_init: implement vgic_init KVM: arm/arm64: vgic-new: vgic_init: implement vgic_create KVM: arm/arm64: vgic-new: vgic_init: implement kvm_vgic_hyp_init ...
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile6
-rw-r--r--tools/kvm/kvm_stat/Makefile41
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat1127
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt63
4 files changed, 1236 insertions, 1 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 6bf68fe7dd29..f10b64d8c674 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -16,6 +16,7 @@ help:
16 @echo ' gpio - GPIO tools' 16 @echo ' gpio - GPIO tools'
17 @echo ' hv - tools used when in Hyper-V clients' 17 @echo ' hv - tools used when in Hyper-V clients'
18 @echo ' iio - IIO tools' 18 @echo ' iio - IIO tools'
19 @echo ' kvm_stat - top-like utility for displaying kvm statistics'
19 @echo ' lguest - a minimal 32-bit x86 hypervisor' 20 @echo ' lguest - a minimal 32-bit x86 hypervisor'
20 @echo ' net - misc networking tools' 21 @echo ' net - misc networking tools'
21 @echo ' perf - Linux performance measurement and analysis tool' 22 @echo ' perf - Linux performance measurement and analysis tool'
@@ -110,10 +111,13 @@ tmon_install:
110freefall_install: 111freefall_install:
111 $(call descend,laptop/$(@:_install=),install) 112 $(call descend,laptop/$(@:_install=),install)
112 113
114kvm_stat_install:
115 $(call descend,kvm/$(@:_install=),install)
116
113install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ 117install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
114 perf_install selftests_install turbostat_install usb_install \ 118 perf_install selftests_install turbostat_install usb_install \
115 virtio_install vm_install net_install x86_energy_perf_policy_install \ 119 virtio_install vm_install net_install x86_energy_perf_policy_install \
116 tmon_install freefall_install objtool_install 120 tmon_install freefall_install objtool_install kvm_stat_install
117 121
118acpi_clean: 122acpi_clean:
119 $(call descend,power/acpi,clean) 123 $(call descend,power/acpi,clean)
diff --git a/tools/kvm/kvm_stat/Makefile b/tools/kvm/kvm_stat/Makefile
new file mode 100644
index 000000000000..5b1cba57e3b3
--- /dev/null
+++ b/tools/kvm/kvm_stat/Makefile
@@ -0,0 +1,41 @@
1include ../../scripts/Makefile.include
2include ../../scripts/utilities.mak
3BINDIR=usr/bin
4MANDIR=usr/share/man
5MAN1DIR=$(MANDIR)/man1
6
7MAN1=kvm_stat.1
8
9A2X=a2x
10a2x_path := $(call get-executable,$(A2X))
11
12all: man
13
14ifneq ($(findstring $(MAKEFLAGS),s),s)
15 ifneq ($(V),1)
16 QUIET_A2X = @echo ' A2X '$@;
17 endif
18endif
19
20%.1: %.txt
21ifeq ($(a2x_path),)
22 $(error "You need to install asciidoc for man pages")
23else
24 $(QUIET_A2X)$(A2X) --doctype manpage --format manpage $<
25endif
26
27clean:
28 rm -f $(MAN1)
29
30man: $(MAN1)
31
32install-man: man
33 install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR)
34 install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR)
35
36install-tools:
37 install -d -m 755 $(INSTALL_ROOT)/$(BINDIR)
38 install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
39
40install: install-tools install-man
41.PHONY: all clean man install-tools install-man install
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
new file mode 100755
index 000000000000..581278c58488
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -0,0 +1,1127 @@
1#!/usr/bin/python
2#
3# top-like utility for displaying kvm statistics
4#
5# Copyright 2006-2008 Qumranet Technologies
6# Copyright 2008-2011 Red Hat, Inc.
7#
8# Authors:
9# Avi Kivity <avi@redhat.com>
10#
11# This work is licensed under the terms of the GNU GPL, version 2. See
12# the COPYING file in the top-level directory.
13"""The kvm_stat module outputs statistics about running KVM VMs
14
15Three different ways of output formatting are available:
16- as a top-like text ui
17- in a key -> value format
18- in an all keys, all values format
19
20The data is sampled from the KVM's debugfs entries and its perf events.
21"""
22
23import curses
24import sys
25import os
26import time
27import optparse
28import ctypes
29import fcntl
30import resource
31import struct
32import re
33from collections import defaultdict
34from time import sleep
35
36VMX_EXIT_REASONS = {
37 'EXCEPTION_NMI': 0,
38 'EXTERNAL_INTERRUPT': 1,
39 'TRIPLE_FAULT': 2,
40 'PENDING_INTERRUPT': 7,
41 'NMI_WINDOW': 8,
42 'TASK_SWITCH': 9,
43 'CPUID': 10,
44 'HLT': 12,
45 'INVLPG': 14,
46 'RDPMC': 15,
47 'RDTSC': 16,
48 'VMCALL': 18,
49 'VMCLEAR': 19,
50 'VMLAUNCH': 20,
51 'VMPTRLD': 21,
52 'VMPTRST': 22,
53 'VMREAD': 23,
54 'VMRESUME': 24,
55 'VMWRITE': 25,
56 'VMOFF': 26,
57 'VMON': 27,
58 'CR_ACCESS': 28,
59 'DR_ACCESS': 29,
60 'IO_INSTRUCTION': 30,
61 'MSR_READ': 31,
62 'MSR_WRITE': 32,
63 'INVALID_STATE': 33,
64 'MWAIT_INSTRUCTION': 36,
65 'MONITOR_INSTRUCTION': 39,
66 'PAUSE_INSTRUCTION': 40,
67 'MCE_DURING_VMENTRY': 41,
68 'TPR_BELOW_THRESHOLD': 43,
69 'APIC_ACCESS': 44,
70 'EPT_VIOLATION': 48,
71 'EPT_MISCONFIG': 49,
72 'WBINVD': 54,
73 'XSETBV': 55,
74 'APIC_WRITE': 56,
75 'INVPCID': 58,
76}
77
78SVM_EXIT_REASONS = {
79 'READ_CR0': 0x000,
80 'READ_CR3': 0x003,
81 'READ_CR4': 0x004,
82 'READ_CR8': 0x008,
83 'WRITE_CR0': 0x010,
84 'WRITE_CR3': 0x013,
85 'WRITE_CR4': 0x014,
86 'WRITE_CR8': 0x018,
87 'READ_DR0': 0x020,
88 'READ_DR1': 0x021,
89 'READ_DR2': 0x022,
90 'READ_DR3': 0x023,
91 'READ_DR4': 0x024,
92 'READ_DR5': 0x025,
93 'READ_DR6': 0x026,
94 'READ_DR7': 0x027,
95 'WRITE_DR0': 0x030,
96 'WRITE_DR1': 0x031,
97 'WRITE_DR2': 0x032,
98 'WRITE_DR3': 0x033,
99 'WRITE_DR4': 0x034,
100 'WRITE_DR5': 0x035,
101 'WRITE_DR6': 0x036,
102 'WRITE_DR7': 0x037,
103 'EXCP_BASE': 0x040,
104 'INTR': 0x060,
105 'NMI': 0x061,
106 'SMI': 0x062,
107 'INIT': 0x063,
108 'VINTR': 0x064,
109 'CR0_SEL_WRITE': 0x065,
110 'IDTR_READ': 0x066,
111 'GDTR_READ': 0x067,
112 'LDTR_READ': 0x068,
113 'TR_READ': 0x069,
114 'IDTR_WRITE': 0x06a,
115 'GDTR_WRITE': 0x06b,
116 'LDTR_WRITE': 0x06c,
117 'TR_WRITE': 0x06d,
118 'RDTSC': 0x06e,
119 'RDPMC': 0x06f,
120 'PUSHF': 0x070,
121 'POPF': 0x071,
122 'CPUID': 0x072,
123 'RSM': 0x073,
124 'IRET': 0x074,
125 'SWINT': 0x075,
126 'INVD': 0x076,
127 'PAUSE': 0x077,
128 'HLT': 0x078,
129 'INVLPG': 0x079,
130 'INVLPGA': 0x07a,
131 'IOIO': 0x07b,
132 'MSR': 0x07c,
133 'TASK_SWITCH': 0x07d,
134 'FERR_FREEZE': 0x07e,
135 'SHUTDOWN': 0x07f,
136 'VMRUN': 0x080,
137 'VMMCALL': 0x081,
138 'VMLOAD': 0x082,
139 'VMSAVE': 0x083,
140 'STGI': 0x084,
141 'CLGI': 0x085,
142 'SKINIT': 0x086,
143 'RDTSCP': 0x087,
144 'ICEBP': 0x088,
145 'WBINVD': 0x089,
146 'MONITOR': 0x08a,
147 'MWAIT': 0x08b,
148 'MWAIT_COND': 0x08c,
149 'XSETBV': 0x08d,
150 'NPF': 0x400,
151}
152
153# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
154AARCH64_EXIT_REASONS = {
155 'UNKNOWN': 0x00,
156 'WFI': 0x01,
157 'CP15_32': 0x03,
158 'CP15_64': 0x04,
159 'CP14_MR': 0x05,
160 'CP14_LS': 0x06,
161 'FP_ASIMD': 0x07,
162 'CP10_ID': 0x08,
163 'CP14_64': 0x0C,
164 'ILL_ISS': 0x0E,
165 'SVC32': 0x11,
166 'HVC32': 0x12,
167 'SMC32': 0x13,
168 'SVC64': 0x15,
169 'HVC64': 0x16,
170 'SMC64': 0x17,
171 'SYS64': 0x18,
172 'IABT': 0x20,
173 'IABT_HYP': 0x21,
174 'PC_ALIGN': 0x22,
175 'DABT': 0x24,
176 'DABT_HYP': 0x25,
177 'SP_ALIGN': 0x26,
178 'FP_EXC32': 0x28,
179 'FP_EXC64': 0x2C,
180 'SERROR': 0x2F,
181 'BREAKPT': 0x30,
182 'BREAKPT_HYP': 0x31,
183 'SOFTSTP': 0x32,
184 'SOFTSTP_HYP': 0x33,
185 'WATCHPT': 0x34,
186 'WATCHPT_HYP': 0x35,
187 'BKPT32': 0x38,
188 'VECTOR32': 0x3A,
189 'BRK64': 0x3C,
190}
191
192# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
193USERSPACE_EXIT_REASONS = {
194 'UNKNOWN': 0,
195 'EXCEPTION': 1,
196 'IO': 2,
197 'HYPERCALL': 3,
198 'DEBUG': 4,
199 'HLT': 5,
200 'MMIO': 6,
201 'IRQ_WINDOW_OPEN': 7,
202 'SHUTDOWN': 8,
203 'FAIL_ENTRY': 9,
204 'INTR': 10,
205 'SET_TPR': 11,
206 'TPR_ACCESS': 12,
207 'S390_SIEIC': 13,
208 'S390_RESET': 14,
209 'DCR': 15,
210 'NMI': 16,
211 'INTERNAL_ERROR': 17,
212 'OSI': 18,
213 'PAPR_HCALL': 19,
214 'S390_UCONTROL': 20,
215 'WATCHDOG': 21,
216 'S390_TSCH': 22,
217 'EPR': 23,
218 'SYSTEM_EVENT': 24,
219}
220
221IOCTL_NUMBERS = {
222 'SET_FILTER': 0x40082406,
223 'ENABLE': 0x00002400,
224 'DISABLE': 0x00002401,
225 'RESET': 0x00002403,
226}
227
228class Arch(object):
229 """Encapsulates global architecture specific data.
230
231 Contains the performance event open syscall and ioctl numbers, as
232 well as the VM exit reasons for the architecture it runs on.
233
234 """
235 @staticmethod
236 def get_arch():
237 machine = os.uname()[4]
238
239 if machine.startswith('ppc'):
240 return ArchPPC()
241 elif machine.startswith('aarch64'):
242 return ArchA64()
243 elif machine.startswith('s390'):
244 return ArchS390()
245 else:
246 # X86_64
247 for line in open('/proc/cpuinfo'):
248 if not line.startswith('flags'):
249 continue
250
251 flags = line.split()
252 if 'vmx' in flags:
253 return ArchX86(VMX_EXIT_REASONS)
254 if 'svm' in flags:
255 return ArchX86(SVM_EXIT_REASONS)
256 return
257
258class ArchX86(Arch):
259 def __init__(self, exit_reasons):
260 self.sc_perf_evt_open = 298
261 self.ioctl_numbers = IOCTL_NUMBERS
262 self.exit_reasons = exit_reasons
263
264class ArchPPC(Arch):
265 def __init__(self):
266 self.sc_perf_evt_open = 319
267 self.ioctl_numbers = IOCTL_NUMBERS
268 self.ioctl_numbers['ENABLE'] = 0x20002400
269 self.ioctl_numbers['DISABLE'] = 0x20002401
270 self.ioctl_numbers['RESET'] = 0x20002403
271
272 # PPC comes in 32 and 64 bit and some generated ioctl
273 # numbers depend on the wordsize.
274 char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
275 self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
276 self.exit_reasons = {}
277
278class ArchA64(Arch):
279 def __init__(self):
280 self.sc_perf_evt_open = 241
281 self.ioctl_numbers = IOCTL_NUMBERS
282 self.exit_reasons = AARCH64_EXIT_REASONS
283
284class ArchS390(Arch):
285 def __init__(self):
286 self.sc_perf_evt_open = 331
287 self.ioctl_numbers = IOCTL_NUMBERS
288 self.exit_reasons = None
289
290ARCH = Arch.get_arch()
291
292
293def walkdir(path):
294 """Returns os.walk() data for specified directory.
295
296 As it is only a wrapper it returns the same 3-tuple of (dirpath,
297 dirnames, filenames).
298 """
299 return next(os.walk(path))
300
301
302def parse_int_list(list_string):
303 """Returns an int list from a string of comma separated integers and
304 integer ranges."""
305 integers = []
306 members = list_string.split(',')
307
308 for member in members:
309 if '-' not in member:
310 integers.append(int(member))
311 else:
312 int_range = member.split('-')
313 integers.extend(range(int(int_range[0]),
314 int(int_range[1]) + 1))
315
316 return integers
317
318
319def get_online_cpus():
320 """Returns a list of cpu id integers."""
321 with open('/sys/devices/system/cpu/online') as cpu_list:
322 cpu_string = cpu_list.readline()
323 return parse_int_list(cpu_string)
324
325
326def get_filters():
327 """Returns a dict of trace events, their filter ids and
328 the values that can be filtered.
329
330 Trace events can be filtered for special values by setting a
331 filter string via an ioctl. The string normally has the format
332 identifier==value. For each filter a new event will be created, to
333 be able to distinguish the events.
334
335 """
336 filters = {}
337 filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
338 if ARCH.exit_reasons:
339 filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
340 return filters
341
342libc = ctypes.CDLL('libc.so.6', use_errno=True)
343syscall = libc.syscall
344
345class perf_event_attr(ctypes.Structure):
346 """Struct that holds the necessary data to set up a trace event.
347
348 For an extensive explanation see perf_event_open(2) and
349 include/uapi/linux/perf_event.h, struct perf_event_attr
350
351 All fields that are not initialized in the constructor are 0.
352
353 """
354 _fields_ = [('type', ctypes.c_uint32),
355 ('size', ctypes.c_uint32),
356 ('config', ctypes.c_uint64),
357 ('sample_freq', ctypes.c_uint64),
358 ('sample_type', ctypes.c_uint64),
359 ('read_format', ctypes.c_uint64),
360 ('flags', ctypes.c_uint64),
361 ('wakeup_events', ctypes.c_uint32),
362 ('bp_type', ctypes.c_uint32),
363 ('bp_addr', ctypes.c_uint64),
364 ('bp_len', ctypes.c_uint64),
365 ]
366
367 def __init__(self):
368 super(self.__class__, self).__init__()
369 self.type = PERF_TYPE_TRACEPOINT
370 self.size = ctypes.sizeof(self)
371 self.read_format = PERF_FORMAT_GROUP
372
373def perf_event_open(attr, pid, cpu, group_fd, flags):
374 """Wrapper for the sys_perf_evt_open() syscall.
375
376 Used to set up performance events, returns a file descriptor or -1
377 on error.
378
379 Attributes are:
380 - syscall number
381 - struct perf_event_attr *
382 - pid or -1 to monitor all pids
383 - cpu number or -1 to monitor all cpus
384 - The file descriptor of the group leader or -1 to create a group.
385 - flags
386
387 """
388 return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
389 ctypes.c_int(pid), ctypes.c_int(cpu),
390 ctypes.c_int(group_fd), ctypes.c_long(flags))
391
392PERF_TYPE_TRACEPOINT = 2
393PERF_FORMAT_GROUP = 1 << 3
394
395PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
396PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
397
398class Group(object):
399 """Represents a perf event group."""
400
401 def __init__(self):
402 self.events = []
403
404 def add_event(self, event):
405 self.events.append(event)
406
407 def read(self):
408 """Returns a dict with 'event name: value' for all events in the
409 group.
410
411 Values are read by reading from the file descriptor of the
412 event that is the group leader. See perf_event_open(2) for
413 details.
414
415 Read format for the used event configuration is:
416 struct read_format {
417 u64 nr; /* The number of events */
418 struct {
419 u64 value; /* The value of the event */
420 } values[nr];
421 };
422
423 """
424 length = 8 * (1 + len(self.events))
425 read_format = 'xxxxxxxx' + 'Q' * len(self.events)
426 return dict(zip([event.name for event in self.events],
427 struct.unpack(read_format,
428 os.read(self.events[0].fd, length))))
429
430class Event(object):
431 """Represents a performance event and manages its life cycle."""
432 def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
433 trace_filter, trace_set='kvm'):
434 self.name = name
435 self.fd = None
436 self.setup_event(group, trace_cpu, trace_pid, trace_point,
437 trace_filter, trace_set)
438
439 def __del__(self):
440 """Closes the event's file descriptor.
441
442 As no python file object was created for the file descriptor,
443 python will not reference count the descriptor and will not
444 close it itself automatically, so we do it.
445
446 """
447 if self.fd:
448 os.close(self.fd)
449
450 def setup_event_attribute(self, trace_set, trace_point):
451 """Returns an initialized ctype perf_event_attr struct."""
452
453 id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
454 trace_point, 'id')
455
456 event_attr = perf_event_attr()
457 event_attr.config = int(open(id_path).read())
458 return event_attr
459
460 def setup_event(self, group, trace_cpu, trace_pid, trace_point,
461 trace_filter, trace_set):
462 """Sets up the perf event in Linux.
463
464 Issues the syscall to register the event in the kernel and
465 then sets the optional filter.
466
467 """
468
469 event_attr = self.setup_event_attribute(trace_set, trace_point)
470
471 # First event will be group leader.
472 group_leader = -1
473
474 # All others have to pass the leader's descriptor instead.
475 if group.events:
476 group_leader = group.events[0].fd
477
478 fd = perf_event_open(event_attr, trace_pid,
479 trace_cpu, group_leader, 0)
480 if fd == -1:
481 err = ctypes.get_errno()
482 raise OSError(err, os.strerror(err),
483 'while calling sys_perf_event_open().')
484
485 if trace_filter:
486 fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
487 trace_filter)
488
489 self.fd = fd
490
491 def enable(self):
492 """Enables the trace event in the kernel.
493
494 Enabling the group leader makes reading counters from it and the
495 events under it possible.
496
497 """
498 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
499
500 def disable(self):
501 """Disables the trace event in the kernel.
502
503 Disabling the group leader makes reading all counters under it
504 impossible.
505
506 """
507 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
508
509 def reset(self):
510 """Resets the count of the trace event in the kernel."""
511 fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
512
513class TracepointProvider(object):
514 """Data provider for the stats class.
515
516 Manages the events/groups from which it acquires its data.
517
518 """
519 def __init__(self):
520 self.group_leaders = []
521 self.filters = get_filters()
522 self._fields = self.get_available_fields()
523 self._pid = 0
524
525 def get_available_fields(self):
526 """Returns a list of available event's of format 'event name(filter
527 name)'.
528
529 All available events have directories under
530 /sys/kernel/debug/tracing/events/ which export information
531 about the specific event. Therefore, listing the dirs gives us
532 a list of all available events.
533
534 Some events like the vm exit reasons can be filtered for
535 specific values. To take account for that, the routine below
536 creates special fields with the following format:
537 event name(filter name)
538
539 """
540 path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
541 fields = walkdir(path)[1]
542 extra = []
543 for field in fields:
544 if field in self.filters:
545 filter_name_, filter_dicts = self.filters[field]
546 for name in filter_dicts:
547 extra.append(field + '(' + name + ')')
548 fields += extra
549 return fields
550
551 def setup_traces(self):
552 """Creates all event and group objects needed to be able to retrieve
553 data."""
554 if self._pid > 0:
555 # Fetch list of all threads of the monitored pid, as qemu
556 # starts a thread for each vcpu.
557 path = os.path.join('/proc', str(self._pid), 'task')
558 groupids = walkdir(path)[1]
559 else:
560 groupids = get_online_cpus()
561
562 # The constant is needed as a buffer for python libs, std
563 # streams and other files that the script opens.
564 newlim = len(groupids) * len(self._fields) + 50
565 try:
566 softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
567
568 if hardlim < newlim:
569 # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
570 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
571 else:
572 # Raising the soft limit is sufficient.
573 resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
574
575 except ValueError:
576 sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
577
578 for groupid in groupids:
579 group = Group()
580 for name in self._fields:
581 tracepoint = name
582 tracefilter = None
583 match = re.match(r'(.*)\((.*)\)', name)
584 if match:
585 tracepoint, sub = match.groups()
586 tracefilter = ('%s==%d\0' %
587 (self.filters[tracepoint][0],
588 self.filters[tracepoint][1][sub]))
589
590 # From perf_event_open(2):
591 # pid > 0 and cpu == -1
592 # This measures the specified process/thread on any CPU.
593 #
594 # pid == -1 and cpu >= 0
595 # This measures all processes/threads on the specified CPU.
596 trace_cpu = groupid if self._pid == 0 else -1
597 trace_pid = int(groupid) if self._pid != 0 else -1
598
599 group.add_event(Event(name=name,
600 group=group,
601 trace_cpu=trace_cpu,
602 trace_pid=trace_pid,
603 trace_point=tracepoint,
604 trace_filter=tracefilter))
605
606 self.group_leaders.append(group)
607
608 def available_fields(self):
609 return self.get_available_fields()
610
611 @property
612 def fields(self):
613 return self._fields
614
615 @fields.setter
616 def fields(self, fields):
617 """Enables/disables the (un)wanted events"""
618 self._fields = fields
619 for group in self.group_leaders:
620 for index, event in enumerate(group.events):
621 if event.name in fields:
622 event.reset()
623 event.enable()
624 else:
625 # Do not disable the group leader.
626 # It would disable all of its events.
627 if index != 0:
628 event.disable()
629
630 @property
631 def pid(self):
632 return self._pid
633
634 @pid.setter
635 def pid(self, pid):
636 """Changes the monitored pid by setting new traces."""
637 self._pid = pid
638 # The garbage collector will get rid of all Event/Group
639 # objects and open files after removing the references.
640 self.group_leaders = []
641 self.setup_traces()
642 self.fields = self._fields
643
644 def read(self):
645 """Returns 'event name: current value' for all enabled events."""
646 ret = defaultdict(int)
647 for group in self.group_leaders:
648 for name, val in group.read().iteritems():
649 if name in self._fields:
650 ret[name] += val
651 return ret
652
653class DebugfsProvider(object):
654 """Provides data from the files that KVM creates in the kvm debugfs
655 folder."""
656 def __init__(self):
657 self._fields = self.get_available_fields()
658 self._pid = 0
659 self.do_read = True
660
661 def get_available_fields(self):
662 """"Returns a list of available fields.
663
664 The fields are all available KVM debugfs files
665
666 """
667 return walkdir(PATH_DEBUGFS_KVM)[2]
668
669 @property
670 def fields(self):
671 return self._fields
672
673 @fields.setter
674 def fields(self, fields):
675 self._fields = fields
676
677 @property
678 def pid(self):
679 return self._pid
680
681 @pid.setter
682 def pid(self, pid):
683 if pid != 0:
684 self._pid = pid
685
686 vms = walkdir(PATH_DEBUGFS_KVM)[1]
687 if len(vms) == 0:
688 self.do_read = False
689
690 self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
691
692 else:
693 self.paths = ['']
694 self.do_read = True
695
696 def read(self):
697 """Returns a dict with format:'file name / field -> current value'."""
698 results = {}
699
700 # If no debugfs filtering support is available, then don't read.
701 if not self.do_read:
702 return results
703
704 for path in self.paths:
705 for field in self._fields:
706 results[field] = results.get(field, 0) \
707 + self.read_field(field, path)
708
709 return results
710
711 def read_field(self, field, path):
712 """Returns the value of a single field from a specific VM."""
713 try:
714 return int(open(os.path.join(PATH_DEBUGFS_KVM,
715 path,
716 field))
717 .read())
718 except IOError:
719 return 0
720
721class Stats(object):
722 """Manages the data providers and the data they provide.
723
724 It is used to set filters on the provider's data and collect all
725 provider data.
726
727 """
728 def __init__(self, providers, pid, fields=None):
729 self.providers = providers
730 self._pid_filter = pid
731 self._fields_filter = fields
732 self.values = {}
733 self.update_provider_pid()
734 self.update_provider_filters()
735
736 def update_provider_filters(self):
737 """Propagates fields filters to providers."""
738 def wanted(key):
739 if not self._fields_filter:
740 return True
741 return re.match(self._fields_filter, key) is not None
742
743 # As we reset the counters when updating the fields we can
744 # also clear the cache of old values.
745 self.values = {}
746 for provider in self.providers:
747 provider_fields = [key for key in provider.get_available_fields()
748 if wanted(key)]
749 provider.fields = provider_fields
750
751 def update_provider_pid(self):
752 """Propagates pid filters to providers."""
753 for provider in self.providers:
754 provider.pid = self._pid_filter
755
756 @property
757 def fields_filter(self):
758 return self._fields_filter
759
760 @fields_filter.setter
761 def fields_filter(self, fields_filter):
762 self._fields_filter = fields_filter
763 self.update_provider_filters()
764
765 @property
766 def pid_filter(self):
767 return self._pid_filter
768
769 @pid_filter.setter
770 def pid_filter(self, pid):
771 self._pid_filter = pid
772 self.values = {}
773 self.update_provider_pid()
774
775 def get(self):
776 """Returns a dict with field -> (value, delta to last value) of all
777 provider data."""
778 for provider in self.providers:
779 new = provider.read()
780 for key in provider.fields:
781 oldval = self.values.get(key, (0, 0))
782 newval = new.get(key, 0)
783 newdelta = None
784 if oldval is not None:
785 newdelta = newval - oldval[0]
786 self.values[key] = (newval, newdelta)
787 return self.values
788
789LABEL_WIDTH = 40
790NUMBER_WIDTH = 10
791
792class Tui(object):
793 """Instruments curses to draw a nice text ui."""
794 def __init__(self, stats):
795 self.stats = stats
796 self.screen = None
797 self.drilldown = False
798 self.update_drilldown()
799
800 def __enter__(self):
801 """Initialises curses for later use. Based on curses.wrapper
802 implementation from the Python standard library."""
803 self.screen = curses.initscr()
804 curses.noecho()
805 curses.cbreak()
806
807 # The try/catch works around a minor bit of
808 # over-conscientiousness in the curses module, the error
809 # return from C start_color() is ignorable.
810 try:
811 curses.start_color()
812 except:
813 pass
814
815 curses.use_default_colors()
816 return self
817
818 def __exit__(self, *exception):
819 """Resets the terminal to its normal state. Based on curses.wrappre
820 implementation from the Python standard library."""
821 if self.screen:
822 self.screen.keypad(0)
823 curses.echo()
824 curses.nocbreak()
825 curses.endwin()
826
827 def update_drilldown(self):
828 """Sets or removes a filter that only allows fields without braces."""
829 if not self.stats.fields_filter:
830 self.stats.fields_filter = r'^[^\(]*$'
831
832 elif self.stats.fields_filter == r'^[^\(]*$':
833 self.stats.fields_filter = None
834
835 def update_pid(self, pid):
836 """Propagates pid selection to stats object."""
837 self.stats.pid_filter = pid
838
839 def refresh(self, sleeptime):
840 """Refreshes on-screen data."""
841 self.screen.erase()
842 if self.stats.pid_filter > 0:
843 self.screen.addstr(0, 0, 'kvm statistics - pid {0}'
844 .format(self.stats.pid_filter),
845 curses.A_BOLD)
846 else:
847 self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
848 self.screen.addstr(2, 1, 'Event')
849 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
850 len('Total'), 'Total')
851 self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
852 len('Current'), 'Current')
853 row = 3
854 stats = self.stats.get()
855 def sortkey(x):
856 if stats[x][1]:
857 return (-stats[x][1], -stats[x][0])
858 else:
859 return (0, -stats[x][0])
860 for key in sorted(stats.keys(), key=sortkey):
861
862 if row >= self.screen.getmaxyx()[0]:
863 break
864 values = stats[key]
865 if not values[0] and not values[1]:
866 break
867 col = 1
868 self.screen.addstr(row, col, key)
869 col += LABEL_WIDTH
870 self.screen.addstr(row, col, '%10d' % (values[0],))
871 col += NUMBER_WIDTH
872 if values[1] is not None:
873 self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
874 row += 1
875 self.screen.refresh()
876
877 def show_filter_selection(self):
878 """Draws filter selection mask.
879
880 Asks for a valid regex and sets the fields filter accordingly.
881
882 """
883 while True:
884 self.screen.erase()
885 self.screen.addstr(0, 0,
886 "Show statistics for events matching a regex.",
887 curses.A_BOLD)
888 self.screen.addstr(2, 0,
889 "Current regex: {0}"
890 .format(self.stats.fields_filter))
891 self.screen.addstr(3, 0, "New regex: ")
892 curses.echo()
893 regex = self.screen.getstr()
894 curses.noecho()
895 if len(regex) == 0:
896 return
897 try:
898 re.compile(regex)
899 self.stats.fields_filter = regex
900 return
901 except re.error:
902 continue
903
904 def show_vm_selection(self):
905 """Draws PID selection mask.
906
907 Asks for a pid until a valid pid or 0 has been entered.
908
909 """
910 while True:
911 self.screen.erase()
912 self.screen.addstr(0, 0,
913 'Show statistics for specific pid.',
914 curses.A_BOLD)
915 self.screen.addstr(1, 0,
916 'This might limit the shown data to the trace '
917 'statistics.')
918
919 curses.echo()
920 self.screen.addstr(3, 0, "Pid [0 or pid]: ")
921 pid = self.screen.getstr()
922 curses.noecho()
923
924 try:
925 pid = int(pid)
926
927 if pid == 0:
928 self.update_pid(pid)
929 break
930 else:
931 if not os.path.isdir(os.path.join('/proc/', str(pid))):
932 continue
933 else:
934 self.update_pid(pid)
935 break
936
937 except ValueError:
938 continue
939
940 def show_stats(self):
941 """Refreshes the screen and processes user input."""
942 sleeptime = 0.25
943 while True:
944 self.refresh(sleeptime)
945 curses.halfdelay(int(sleeptime * 10))
946 sleeptime = 3
947 try:
948 char = self.screen.getkey()
949 if char == 'x':
950 self.drilldown = not self.drilldown
951 self.update_drilldown()
952 if char == 'q':
953 break
954 if char == 'f':
955 self.show_filter_selection()
956 if char == 'p':
957 self.show_vm_selection()
958 except KeyboardInterrupt:
959 break
960 except curses.error:
961 continue
962
963def batch(stats):
964 """Prints statistics in a key, value format."""
965 s = stats.get()
966 time.sleep(1)
967 s = stats.get()
968 for key in sorted(s.keys()):
969 values = s[key]
970 print '%-42s%10d%10d' % (key, values[0], values[1])
971
972def log(stats):
973 """Prints statistics as reiterating key block, multiple value blocks."""
974 keys = sorted(stats.get().iterkeys())
975 def banner():
976 for k in keys:
977 print '%s' % k,
978 print
979 def statline():
980 s = stats.get()
981 for k in keys:
982 print ' %9d' % s[k][1],
983 print
984 line = 0
985 banner_repeat = 20
986 while True:
987 time.sleep(1)
988 if line % banner_repeat == 0:
989 banner()
990 statline()
991 line += 1
992
993def get_options():
994 """Returns processed program arguments."""
995 description_text = """
996This script displays various statistics about VMs running under KVM.
997The statistics are gathered from the KVM debugfs entries and / or the
998currently available perf traces.
999
1000The monitoring takes additional cpu cycles and might affect the VM's
1001performance.
1002
1003Requirements:
1004- Access to:
1005 /sys/kernel/debug/kvm
1006 /sys/kernel/debug/trace/events/*
1007 /proc/pid/task
1008- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1009 CAP_SYS_ADMIN and perf events are used.
1010- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1011 the large number of files that are possibly opened.
1012"""
1013
1014 class PlainHelpFormatter(optparse.IndentedHelpFormatter):
1015 def format_description(self, description):
1016 if description:
1017 return description + "\n"
1018 else:
1019 return ""
1020
1021 optparser = optparse.OptionParser(description=description_text,
1022 formatter=PlainHelpFormatter())
1023 optparser.add_option('-1', '--once', '--batch',
1024 action='store_true',
1025 default=False,
1026 dest='once',
1027 help='run in batch mode for one second',
1028 )
1029 optparser.add_option('-l', '--log',
1030 action='store_true',
1031 default=False,
1032 dest='log',
1033 help='run in logging mode (like vmstat)',
1034 )
1035 optparser.add_option('-t', '--tracepoints',
1036 action='store_true',
1037 default=False,
1038 dest='tracepoints',
1039 help='retrieve statistics from tracepoints',
1040 )
1041 optparser.add_option('-d', '--debugfs',
1042 action='store_true',
1043 default=False,
1044 dest='debugfs',
1045 help='retrieve statistics from debugfs',
1046 )
1047 optparser.add_option('-f', '--fields',
1048 action='store',
1049 default=None,
1050 dest='fields',
1051 help='fields to display (regex)',
1052 )
1053 optparser.add_option('-p', '--pid',
1054 action='store',
1055 default=0,
1056 type=int,
1057 dest='pid',
1058 help='restrict statistics to pid',
1059 )
1060 (options, _) = optparser.parse_args(sys.argv)
1061 return options
1062
1063def get_providers(options):
1064 """Returns a list of data providers depending on the passed options."""
1065 providers = []
1066
1067 if options.tracepoints:
1068 providers.append(TracepointProvider())
1069 if options.debugfs:
1070 providers.append(DebugfsProvider())
1071 if len(providers) == 0:
1072 providers.append(TracepointProvider())
1073
1074 return providers
1075
1076def check_access(options):
1077 """Exits if the current user can't access all needed directories."""
1078 if not os.path.exists('/sys/kernel/debug'):
1079 sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
1080 sys.exit(1)
1081
1082 if not os.path.exists(PATH_DEBUGFS_KVM):
1083 sys.stderr.write("Please make sure, that debugfs is mounted and "
1084 "readable by the current user:\n"
1085 "('mount -t debugfs debugfs /sys/kernel/debug')\n"
1086 "Also ensure, that the kvm modules are loaded.\n")
1087 sys.exit(1)
1088
1089 if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints
1090 or not options.debugfs):
1091 sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1092 "when using the option -t (default).\n"
1093 "If it is enabled, make {0} readable by the "
1094 "current user.\n"
1095 .format(PATH_DEBUGFS_TRACING))
1096 if options.tracepoints:
1097 sys.exit(1)
1098
1099 sys.stderr.write("Falling back to debugfs statistics!\n")
1100 options.debugfs = True
1101 sleep(5)
1102
1103 return options
1104
1105def main():
1106 options = get_options()
1107 options = check_access(options)
1108
1109 if (options.pid > 0 and
1110 not os.path.isdir(os.path.join('/proc/',
1111 str(options.pid)))):
1112 sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1113 sys.exit('Specified pid does not exist.')
1114
1115 providers = get_providers(options)
1116 stats = Stats(providers, options.pid, fields=options.fields)
1117
1118 if options.log:
1119 log(stats)
1120 elif not options.once:
1121 with Tui(stats) as tui:
1122 tui.show_stats()
1123 else:
1124 batch(stats)
1125
1126if __name__ == "__main__":
1127 main()
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
new file mode 100644
index 000000000000..b92a153d7115
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -0,0 +1,63 @@
1kvm_stat(1)
2===========
3
4NAME
5----
6kvm_stat - Report KVM kernel module event counters
7
8SYNOPSIS
9--------
10[verse]
11'kvm_stat' [OPTION]...
12
13DESCRIPTION
14-----------
15kvm_stat prints counts of KVM kernel module trace events. These events signify
16state transitions such as guest mode entry and exit.
17
18This tool is useful for observing guest behavior from the host perspective.
19Often conclusions about performance or buggy behavior can be drawn from the
20output.
21
22The set of KVM kernel module trace events may be specific to the kernel version
23or architecture. It is best to check the KVM kernel module source code for the
24meaning of events.
25
26OPTIONS
27-------
28-1::
29--once::
30--batch::
31 run in batch mode for one second
32
33-l::
34--log::
35 run in logging mode (like vmstat)
36
37-t::
38--tracepoints::
39 retrieve statistics from tracepoints
40
41-d::
42--debugfs::
43 retrieve statistics from debugfs
44
45-p<pid>::
46--pid=<pid>::
47 limit statistics to one virtual machine (pid)
48
49-f<fields>::
50--fields=<fields>::
51 fields to display (regex)
52
53-h::
54--help::
55 show help message
56
57SEE ALSO
58--------
59'perf'(1), 'trace-cmd'(1)
60
61AUTHOR
62------
63Stefan Hajnoczi <stefanha@redhat.com>