diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-27 16:41:54 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-27 16:41:54 -0400 |
commit | e28e909c36bb5d6319953822d84df00fce7cbd18 (patch) | |
tree | a4aca971908a7a604c6fdd9a95360728f9f721b3 /tools | |
parent | dc03c0f9d12d85286d5e3623aa96d5c2a271b8e6 (diff) | |
parent | fabc712866435660f7fa1070e1fabe29eba5bc4c (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull second batch of KVM updates from Radim Krčmář:
"General:
- move kvm_stat tool from QEMU repo into tools/kvm/kvm_stat (kvm_stat
had nothing to do with QEMU in the first place -- the tool only
interprets debugfs)
- expose per-vm statistics in debugfs and support them in kvm_stat
(KVM always collected per-vm statistics, but they were summarised
into global statistics)
x86:
- fix dynamic APICv (VMX was improperly configured and a guest could
access host's APIC MSRs, CVE-2016-4440)
- minor fixes
ARM changes from Christoffer Dall:
- new vgic reimplementation of our horribly broken legacy vgic
implementation. The two implementations will live side-by-side
(with the new being the configured default) for one kernel release
and then we'll remove the legacy one.
- fix for a non-critical issue with virtual abort injection to guests"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (70 commits)
tools: kvm_stat: Add comments
tools: kvm_stat: Introduce pid monitoring
KVM: Create debugfs dir and stat files for each VM
MAINTAINERS: Add kvm tools
tools: kvm_stat: Powerpc related fixes
tools: Add kvm_stat man page
tools: Add kvm_stat vm monitor script
kvm:vmx: more complete state update on APICv on/off
KVM: SVM: Add more SVM_EXIT_REASONS
KVM: Unify traced vector format
svm: bitwise vs logical op typo
KVM: arm/arm64: vgic-new: Synchronize changes to active state
KVM: arm/arm64: vgic-new: enable build
KVM: arm/arm64: vgic-new: implement mapped IRQ handling
KVM: arm/arm64: vgic-new: Wire up irqfd injection
KVM: arm/arm64: vgic-new: Add vgic_v2/v3_enable
KVM: arm/arm64: vgic-new: vgic_init: implement map_resources
KVM: arm/arm64: vgic-new: vgic_init: implement vgic_init
KVM: arm/arm64: vgic-new: vgic_init: implement vgic_create
KVM: arm/arm64: vgic-new: vgic_init: implement kvm_vgic_hyp_init
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/Makefile | 6 | ||||
-rw-r--r-- | tools/kvm/kvm_stat/Makefile | 41 | ||||
-rwxr-xr-x | tools/kvm/kvm_stat/kvm_stat | 1127 | ||||
-rw-r--r-- | tools/kvm/kvm_stat/kvm_stat.txt | 63 |
4 files changed, 1236 insertions, 1 deletions
diff --git a/tools/Makefile b/tools/Makefile index 6bf68fe7dd29..f10b64d8c674 100644 --- a/tools/Makefile +++ b/tools/Makefile | |||
@@ -16,6 +16,7 @@ help: | |||
16 | @echo ' gpio - GPIO tools' | 16 | @echo ' gpio - GPIO tools' |
17 | @echo ' hv - tools used when in Hyper-V clients' | 17 | @echo ' hv - tools used when in Hyper-V clients' |
18 | @echo ' iio - IIO tools' | 18 | @echo ' iio - IIO tools' |
19 | @echo ' kvm_stat - top-like utility for displaying kvm statistics' | ||
19 | @echo ' lguest - a minimal 32-bit x86 hypervisor' | 20 | @echo ' lguest - a minimal 32-bit x86 hypervisor' |
20 | @echo ' net - misc networking tools' | 21 | @echo ' net - misc networking tools' |
21 | @echo ' perf - Linux performance measurement and analysis tool' | 22 | @echo ' perf - Linux performance measurement and analysis tool' |
@@ -110,10 +111,13 @@ tmon_install: | |||
110 | freefall_install: | 111 | freefall_install: |
111 | $(call descend,laptop/$(@:_install=),install) | 112 | $(call descend,laptop/$(@:_install=),install) |
112 | 113 | ||
114 | kvm_stat_install: | ||
115 | $(call descend,kvm/$(@:_install=),install) | ||
116 | |||
113 | install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ | 117 | install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ |
114 | perf_install selftests_install turbostat_install usb_install \ | 118 | perf_install selftests_install turbostat_install usb_install \ |
115 | virtio_install vm_install net_install x86_energy_perf_policy_install \ | 119 | virtio_install vm_install net_install x86_energy_perf_policy_install \ |
116 | tmon_install freefall_install objtool_install | 120 | tmon_install freefall_install objtool_install kvm_stat_install |
117 | 121 | ||
118 | acpi_clean: | 122 | acpi_clean: |
119 | $(call descend,power/acpi,clean) | 123 | $(call descend,power/acpi,clean) |
diff --git a/tools/kvm/kvm_stat/Makefile b/tools/kvm/kvm_stat/Makefile new file mode 100644 index 000000000000..5b1cba57e3b3 --- /dev/null +++ b/tools/kvm/kvm_stat/Makefile | |||
@@ -0,0 +1,41 @@ | |||
1 | include ../../scripts/Makefile.include | ||
2 | include ../../scripts/utilities.mak | ||
3 | BINDIR=usr/bin | ||
4 | MANDIR=usr/share/man | ||
5 | MAN1DIR=$(MANDIR)/man1 | ||
6 | |||
7 | MAN1=kvm_stat.1 | ||
8 | |||
9 | A2X=a2x | ||
10 | a2x_path := $(call get-executable,$(A2X)) | ||
11 | |||
12 | all: man | ||
13 | |||
14 | ifneq ($(findstring $(MAKEFLAGS),s),s) | ||
15 | ifneq ($(V),1) | ||
16 | QUIET_A2X = @echo ' A2X '$@; | ||
17 | endif | ||
18 | endif | ||
19 | |||
20 | %.1: %.txt | ||
21 | ifeq ($(a2x_path),) | ||
22 | $(error "You need to install asciidoc for man pages") | ||
23 | else | ||
24 | $(QUIET_A2X)$(A2X) --doctype manpage --format manpage $< | ||
25 | endif | ||
26 | |||
27 | clean: | ||
28 | rm -f $(MAN1) | ||
29 | |||
30 | man: $(MAN1) | ||
31 | |||
32 | install-man: man | ||
33 | install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR) | ||
34 | install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR) | ||
35 | |||
36 | install-tools: | ||
37 | install -d -m 755 $(INSTALL_ROOT)/$(BINDIR) | ||
38 | install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" | ||
39 | |||
40 | install: install-tools install-man | ||
41 | .PHONY: all clean man install-tools install-man install | ||
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat new file mode 100755 index 000000000000..581278c58488 --- /dev/null +++ b/tools/kvm/kvm_stat/kvm_stat | |||
@@ -0,0 +1,1127 @@ | |||
1 | #!/usr/bin/python | ||
2 | # | ||
3 | # top-like utility for displaying kvm statistics | ||
4 | # | ||
5 | # Copyright 2006-2008 Qumranet Technologies | ||
6 | # Copyright 2008-2011 Red Hat, Inc. | ||
7 | # | ||
8 | # Authors: | ||
9 | # Avi Kivity <avi@redhat.com> | ||
10 | # | ||
11 | # This work is licensed under the terms of the GNU GPL, version 2. See | ||
12 | # the COPYING file in the top-level directory. | ||
13 | """The kvm_stat module outputs statistics about running KVM VMs | ||
14 | |||
15 | Three different ways of output formatting are available: | ||
16 | - as a top-like text ui | ||
17 | - in a key -> value format | ||
18 | - in an all keys, all values format | ||
19 | |||
20 | The data is sampled from the KVM's debugfs entries and its perf events. | ||
21 | """ | ||
22 | |||
23 | import curses | ||
24 | import sys | ||
25 | import os | ||
26 | import time | ||
27 | import optparse | ||
28 | import ctypes | ||
29 | import fcntl | ||
30 | import resource | ||
31 | import struct | ||
32 | import re | ||
33 | from collections import defaultdict | ||
34 | from time import sleep | ||
35 | |||
36 | VMX_EXIT_REASONS = { | ||
37 | 'EXCEPTION_NMI': 0, | ||
38 | 'EXTERNAL_INTERRUPT': 1, | ||
39 | 'TRIPLE_FAULT': 2, | ||
40 | 'PENDING_INTERRUPT': 7, | ||
41 | 'NMI_WINDOW': 8, | ||
42 | 'TASK_SWITCH': 9, | ||
43 | 'CPUID': 10, | ||
44 | 'HLT': 12, | ||
45 | 'INVLPG': 14, | ||
46 | 'RDPMC': 15, | ||
47 | 'RDTSC': 16, | ||
48 | 'VMCALL': 18, | ||
49 | 'VMCLEAR': 19, | ||
50 | 'VMLAUNCH': 20, | ||
51 | 'VMPTRLD': 21, | ||
52 | 'VMPTRST': 22, | ||
53 | 'VMREAD': 23, | ||
54 | 'VMRESUME': 24, | ||
55 | 'VMWRITE': 25, | ||
56 | 'VMOFF': 26, | ||
57 | 'VMON': 27, | ||
58 | 'CR_ACCESS': 28, | ||
59 | 'DR_ACCESS': 29, | ||
60 | 'IO_INSTRUCTION': 30, | ||
61 | 'MSR_READ': 31, | ||
62 | 'MSR_WRITE': 32, | ||
63 | 'INVALID_STATE': 33, | ||
64 | 'MWAIT_INSTRUCTION': 36, | ||
65 | 'MONITOR_INSTRUCTION': 39, | ||
66 | 'PAUSE_INSTRUCTION': 40, | ||
67 | 'MCE_DURING_VMENTRY': 41, | ||
68 | 'TPR_BELOW_THRESHOLD': 43, | ||
69 | 'APIC_ACCESS': 44, | ||
70 | 'EPT_VIOLATION': 48, | ||
71 | 'EPT_MISCONFIG': 49, | ||
72 | 'WBINVD': 54, | ||
73 | 'XSETBV': 55, | ||
74 | 'APIC_WRITE': 56, | ||
75 | 'INVPCID': 58, | ||
76 | } | ||
77 | |||
78 | SVM_EXIT_REASONS = { | ||
79 | 'READ_CR0': 0x000, | ||
80 | 'READ_CR3': 0x003, | ||
81 | 'READ_CR4': 0x004, | ||
82 | 'READ_CR8': 0x008, | ||
83 | 'WRITE_CR0': 0x010, | ||
84 | 'WRITE_CR3': 0x013, | ||
85 | 'WRITE_CR4': 0x014, | ||
86 | 'WRITE_CR8': 0x018, | ||
87 | 'READ_DR0': 0x020, | ||
88 | 'READ_DR1': 0x021, | ||
89 | 'READ_DR2': 0x022, | ||
90 | 'READ_DR3': 0x023, | ||
91 | 'READ_DR4': 0x024, | ||
92 | 'READ_DR5': 0x025, | ||
93 | 'READ_DR6': 0x026, | ||
94 | 'READ_DR7': 0x027, | ||
95 | 'WRITE_DR0': 0x030, | ||
96 | 'WRITE_DR1': 0x031, | ||
97 | 'WRITE_DR2': 0x032, | ||
98 | 'WRITE_DR3': 0x033, | ||
99 | 'WRITE_DR4': 0x034, | ||
100 | 'WRITE_DR5': 0x035, | ||
101 | 'WRITE_DR6': 0x036, | ||
102 | 'WRITE_DR7': 0x037, | ||
103 | 'EXCP_BASE': 0x040, | ||
104 | 'INTR': 0x060, | ||
105 | 'NMI': 0x061, | ||
106 | 'SMI': 0x062, | ||
107 | 'INIT': 0x063, | ||
108 | 'VINTR': 0x064, | ||
109 | 'CR0_SEL_WRITE': 0x065, | ||
110 | 'IDTR_READ': 0x066, | ||
111 | 'GDTR_READ': 0x067, | ||
112 | 'LDTR_READ': 0x068, | ||
113 | 'TR_READ': 0x069, | ||
114 | 'IDTR_WRITE': 0x06a, | ||
115 | 'GDTR_WRITE': 0x06b, | ||
116 | 'LDTR_WRITE': 0x06c, | ||
117 | 'TR_WRITE': 0x06d, | ||
118 | 'RDTSC': 0x06e, | ||
119 | 'RDPMC': 0x06f, | ||
120 | 'PUSHF': 0x070, | ||
121 | 'POPF': 0x071, | ||
122 | 'CPUID': 0x072, | ||
123 | 'RSM': 0x073, | ||
124 | 'IRET': 0x074, | ||
125 | 'SWINT': 0x075, | ||
126 | 'INVD': 0x076, | ||
127 | 'PAUSE': 0x077, | ||
128 | 'HLT': 0x078, | ||
129 | 'INVLPG': 0x079, | ||
130 | 'INVLPGA': 0x07a, | ||
131 | 'IOIO': 0x07b, | ||
132 | 'MSR': 0x07c, | ||
133 | 'TASK_SWITCH': 0x07d, | ||
134 | 'FERR_FREEZE': 0x07e, | ||
135 | 'SHUTDOWN': 0x07f, | ||
136 | 'VMRUN': 0x080, | ||
137 | 'VMMCALL': 0x081, | ||
138 | 'VMLOAD': 0x082, | ||
139 | 'VMSAVE': 0x083, | ||
140 | 'STGI': 0x084, | ||
141 | 'CLGI': 0x085, | ||
142 | 'SKINIT': 0x086, | ||
143 | 'RDTSCP': 0x087, | ||
144 | 'ICEBP': 0x088, | ||
145 | 'WBINVD': 0x089, | ||
146 | 'MONITOR': 0x08a, | ||
147 | 'MWAIT': 0x08b, | ||
148 | 'MWAIT_COND': 0x08c, | ||
149 | 'XSETBV': 0x08d, | ||
150 | 'NPF': 0x400, | ||
151 | } | ||
152 | |||
153 | # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) | ||
154 | AARCH64_EXIT_REASONS = { | ||
155 | 'UNKNOWN': 0x00, | ||
156 | 'WFI': 0x01, | ||
157 | 'CP15_32': 0x03, | ||
158 | 'CP15_64': 0x04, | ||
159 | 'CP14_MR': 0x05, | ||
160 | 'CP14_LS': 0x06, | ||
161 | 'FP_ASIMD': 0x07, | ||
162 | 'CP10_ID': 0x08, | ||
163 | 'CP14_64': 0x0C, | ||
164 | 'ILL_ISS': 0x0E, | ||
165 | 'SVC32': 0x11, | ||
166 | 'HVC32': 0x12, | ||
167 | 'SMC32': 0x13, | ||
168 | 'SVC64': 0x15, | ||
169 | 'HVC64': 0x16, | ||
170 | 'SMC64': 0x17, | ||
171 | 'SYS64': 0x18, | ||
172 | 'IABT': 0x20, | ||
173 | 'IABT_HYP': 0x21, | ||
174 | 'PC_ALIGN': 0x22, | ||
175 | 'DABT': 0x24, | ||
176 | 'DABT_HYP': 0x25, | ||
177 | 'SP_ALIGN': 0x26, | ||
178 | 'FP_EXC32': 0x28, | ||
179 | 'FP_EXC64': 0x2C, | ||
180 | 'SERROR': 0x2F, | ||
181 | 'BREAKPT': 0x30, | ||
182 | 'BREAKPT_HYP': 0x31, | ||
183 | 'SOFTSTP': 0x32, | ||
184 | 'SOFTSTP_HYP': 0x33, | ||
185 | 'WATCHPT': 0x34, | ||
186 | 'WATCHPT_HYP': 0x35, | ||
187 | 'BKPT32': 0x38, | ||
188 | 'VECTOR32': 0x3A, | ||
189 | 'BRK64': 0x3C, | ||
190 | } | ||
191 | |||
192 | # From include/uapi/linux/kvm.h, KVM_EXIT_xxx | ||
193 | USERSPACE_EXIT_REASONS = { | ||
194 | 'UNKNOWN': 0, | ||
195 | 'EXCEPTION': 1, | ||
196 | 'IO': 2, | ||
197 | 'HYPERCALL': 3, | ||
198 | 'DEBUG': 4, | ||
199 | 'HLT': 5, | ||
200 | 'MMIO': 6, | ||
201 | 'IRQ_WINDOW_OPEN': 7, | ||
202 | 'SHUTDOWN': 8, | ||
203 | 'FAIL_ENTRY': 9, | ||
204 | 'INTR': 10, | ||
205 | 'SET_TPR': 11, | ||
206 | 'TPR_ACCESS': 12, | ||
207 | 'S390_SIEIC': 13, | ||
208 | 'S390_RESET': 14, | ||
209 | 'DCR': 15, | ||
210 | 'NMI': 16, | ||
211 | 'INTERNAL_ERROR': 17, | ||
212 | 'OSI': 18, | ||
213 | 'PAPR_HCALL': 19, | ||
214 | 'S390_UCONTROL': 20, | ||
215 | 'WATCHDOG': 21, | ||
216 | 'S390_TSCH': 22, | ||
217 | 'EPR': 23, | ||
218 | 'SYSTEM_EVENT': 24, | ||
219 | } | ||
220 | |||
221 | IOCTL_NUMBERS = { | ||
222 | 'SET_FILTER': 0x40082406, | ||
223 | 'ENABLE': 0x00002400, | ||
224 | 'DISABLE': 0x00002401, | ||
225 | 'RESET': 0x00002403, | ||
226 | } | ||
227 | |||
228 | class Arch(object): | ||
229 | """Encapsulates global architecture specific data. | ||
230 | |||
231 | Contains the performance event open syscall and ioctl numbers, as | ||
232 | well as the VM exit reasons for the architecture it runs on. | ||
233 | |||
234 | """ | ||
235 | @staticmethod | ||
236 | def get_arch(): | ||
237 | machine = os.uname()[4] | ||
238 | |||
239 | if machine.startswith('ppc'): | ||
240 | return ArchPPC() | ||
241 | elif machine.startswith('aarch64'): | ||
242 | return ArchA64() | ||
243 | elif machine.startswith('s390'): | ||
244 | return ArchS390() | ||
245 | else: | ||
246 | # X86_64 | ||
247 | for line in open('/proc/cpuinfo'): | ||
248 | if not line.startswith('flags'): | ||
249 | continue | ||
250 | |||
251 | flags = line.split() | ||
252 | if 'vmx' in flags: | ||
253 | return ArchX86(VMX_EXIT_REASONS) | ||
254 | if 'svm' in flags: | ||
255 | return ArchX86(SVM_EXIT_REASONS) | ||
256 | return | ||
257 | |||
258 | class ArchX86(Arch): | ||
259 | def __init__(self, exit_reasons): | ||
260 | self.sc_perf_evt_open = 298 | ||
261 | self.ioctl_numbers = IOCTL_NUMBERS | ||
262 | self.exit_reasons = exit_reasons | ||
263 | |||
264 | class ArchPPC(Arch): | ||
265 | def __init__(self): | ||
266 | self.sc_perf_evt_open = 319 | ||
267 | self.ioctl_numbers = IOCTL_NUMBERS | ||
268 | self.ioctl_numbers['ENABLE'] = 0x20002400 | ||
269 | self.ioctl_numbers['DISABLE'] = 0x20002401 | ||
270 | self.ioctl_numbers['RESET'] = 0x20002403 | ||
271 | |||
272 | # PPC comes in 32 and 64 bit and some generated ioctl | ||
273 | # numbers depend on the wordsize. | ||
274 | char_ptr_size = ctypes.sizeof(ctypes.c_char_p) | ||
275 | self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 | ||
276 | self.exit_reasons = {} | ||
277 | |||
278 | class ArchA64(Arch): | ||
279 | def __init__(self): | ||
280 | self.sc_perf_evt_open = 241 | ||
281 | self.ioctl_numbers = IOCTL_NUMBERS | ||
282 | self.exit_reasons = AARCH64_EXIT_REASONS | ||
283 | |||
284 | class ArchS390(Arch): | ||
285 | def __init__(self): | ||
286 | self.sc_perf_evt_open = 331 | ||
287 | self.ioctl_numbers = IOCTL_NUMBERS | ||
288 | self.exit_reasons = None | ||
289 | |||
290 | ARCH = Arch.get_arch() | ||
291 | |||
292 | |||
293 | def walkdir(path): | ||
294 | """Returns os.walk() data for specified directory. | ||
295 | |||
296 | As it is only a wrapper it returns the same 3-tuple of (dirpath, | ||
297 | dirnames, filenames). | ||
298 | """ | ||
299 | return next(os.walk(path)) | ||
300 | |||
301 | |||
302 | def parse_int_list(list_string): | ||
303 | """Returns an int list from a string of comma separated integers and | ||
304 | integer ranges.""" | ||
305 | integers = [] | ||
306 | members = list_string.split(',') | ||
307 | |||
308 | for member in members: | ||
309 | if '-' not in member: | ||
310 | integers.append(int(member)) | ||
311 | else: | ||
312 | int_range = member.split('-') | ||
313 | integers.extend(range(int(int_range[0]), | ||
314 | int(int_range[1]) + 1)) | ||
315 | |||
316 | return integers | ||
317 | |||
318 | |||
319 | def get_online_cpus(): | ||
320 | """Returns a list of cpu id integers.""" | ||
321 | with open('/sys/devices/system/cpu/online') as cpu_list: | ||
322 | cpu_string = cpu_list.readline() | ||
323 | return parse_int_list(cpu_string) | ||
324 | |||
325 | |||
326 | def get_filters(): | ||
327 | """Returns a dict of trace events, their filter ids and | ||
328 | the values that can be filtered. | ||
329 | |||
330 | Trace events can be filtered for special values by setting a | ||
331 | filter string via an ioctl. The string normally has the format | ||
332 | identifier==value. For each filter a new event will be created, to | ||
333 | be able to distinguish the events. | ||
334 | |||
335 | """ | ||
336 | filters = {} | ||
337 | filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) | ||
338 | if ARCH.exit_reasons: | ||
339 | filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) | ||
340 | return filters | ||
341 | |||
342 | libc = ctypes.CDLL('libc.so.6', use_errno=True) | ||
343 | syscall = libc.syscall | ||
344 | |||
345 | class perf_event_attr(ctypes.Structure): | ||
346 | """Struct that holds the necessary data to set up a trace event. | ||
347 | |||
348 | For an extensive explanation see perf_event_open(2) and | ||
349 | include/uapi/linux/perf_event.h, struct perf_event_attr | ||
350 | |||
351 | All fields that are not initialized in the constructor are 0. | ||
352 | |||
353 | """ | ||
354 | _fields_ = [('type', ctypes.c_uint32), | ||
355 | ('size', ctypes.c_uint32), | ||
356 | ('config', ctypes.c_uint64), | ||
357 | ('sample_freq', ctypes.c_uint64), | ||
358 | ('sample_type', ctypes.c_uint64), | ||
359 | ('read_format', ctypes.c_uint64), | ||
360 | ('flags', ctypes.c_uint64), | ||
361 | ('wakeup_events', ctypes.c_uint32), | ||
362 | ('bp_type', ctypes.c_uint32), | ||
363 | ('bp_addr', ctypes.c_uint64), | ||
364 | ('bp_len', ctypes.c_uint64), | ||
365 | ] | ||
366 | |||
367 | def __init__(self): | ||
368 | super(self.__class__, self).__init__() | ||
369 | self.type = PERF_TYPE_TRACEPOINT | ||
370 | self.size = ctypes.sizeof(self) | ||
371 | self.read_format = PERF_FORMAT_GROUP | ||
372 | |||
373 | def perf_event_open(attr, pid, cpu, group_fd, flags): | ||
374 | """Wrapper for the sys_perf_evt_open() syscall. | ||
375 | |||
376 | Used to set up performance events, returns a file descriptor or -1 | ||
377 | on error. | ||
378 | |||
379 | Attributes are: | ||
380 | - syscall number | ||
381 | - struct perf_event_attr * | ||
382 | - pid or -1 to monitor all pids | ||
383 | - cpu number or -1 to monitor all cpus | ||
384 | - The file descriptor of the group leader or -1 to create a group. | ||
385 | - flags | ||
386 | |||
387 | """ | ||
388 | return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), | ||
389 | ctypes.c_int(pid), ctypes.c_int(cpu), | ||
390 | ctypes.c_int(group_fd), ctypes.c_long(flags)) | ||
391 | |||
392 | PERF_TYPE_TRACEPOINT = 2 | ||
393 | PERF_FORMAT_GROUP = 1 << 3 | ||
394 | |||
395 | PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' | ||
396 | PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' | ||
397 | |||
398 | class Group(object): | ||
399 | """Represents a perf event group.""" | ||
400 | |||
401 | def __init__(self): | ||
402 | self.events = [] | ||
403 | |||
404 | def add_event(self, event): | ||
405 | self.events.append(event) | ||
406 | |||
407 | def read(self): | ||
408 | """Returns a dict with 'event name: value' for all events in the | ||
409 | group. | ||
410 | |||
411 | Values are read by reading from the file descriptor of the | ||
412 | event that is the group leader. See perf_event_open(2) for | ||
413 | details. | ||
414 | |||
415 | Read format for the used event configuration is: | ||
416 | struct read_format { | ||
417 | u64 nr; /* The number of events */ | ||
418 | struct { | ||
419 | u64 value; /* The value of the event */ | ||
420 | } values[nr]; | ||
421 | }; | ||
422 | |||
423 | """ | ||
424 | length = 8 * (1 + len(self.events)) | ||
425 | read_format = 'xxxxxxxx' + 'Q' * len(self.events) | ||
426 | return dict(zip([event.name for event in self.events], | ||
427 | struct.unpack(read_format, | ||
428 | os.read(self.events[0].fd, length)))) | ||
429 | |||
430 | class Event(object): | ||
431 | """Represents a performance event and manages its life cycle.""" | ||
432 | def __init__(self, name, group, trace_cpu, trace_pid, trace_point, | ||
433 | trace_filter, trace_set='kvm'): | ||
434 | self.name = name | ||
435 | self.fd = None | ||
436 | self.setup_event(group, trace_cpu, trace_pid, trace_point, | ||
437 | trace_filter, trace_set) | ||
438 | |||
439 | def __del__(self): | ||
440 | """Closes the event's file descriptor. | ||
441 | |||
442 | As no python file object was created for the file descriptor, | ||
443 | python will not reference count the descriptor and will not | ||
444 | close it itself automatically, so we do it. | ||
445 | |||
446 | """ | ||
447 | if self.fd: | ||
448 | os.close(self.fd) | ||
449 | |||
450 | def setup_event_attribute(self, trace_set, trace_point): | ||
451 | """Returns an initialized ctype perf_event_attr struct.""" | ||
452 | |||
453 | id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, | ||
454 | trace_point, 'id') | ||
455 | |||
456 | event_attr = perf_event_attr() | ||
457 | event_attr.config = int(open(id_path).read()) | ||
458 | return event_attr | ||
459 | |||
460 | def setup_event(self, group, trace_cpu, trace_pid, trace_point, | ||
461 | trace_filter, trace_set): | ||
462 | """Sets up the perf event in Linux. | ||
463 | |||
464 | Issues the syscall to register the event in the kernel and | ||
465 | then sets the optional filter. | ||
466 | |||
467 | """ | ||
468 | |||
469 | event_attr = self.setup_event_attribute(trace_set, trace_point) | ||
470 | |||
471 | # First event will be group leader. | ||
472 | group_leader = -1 | ||
473 | |||
474 | # All others have to pass the leader's descriptor instead. | ||
475 | if group.events: | ||
476 | group_leader = group.events[0].fd | ||
477 | |||
478 | fd = perf_event_open(event_attr, trace_pid, | ||
479 | trace_cpu, group_leader, 0) | ||
480 | if fd == -1: | ||
481 | err = ctypes.get_errno() | ||
482 | raise OSError(err, os.strerror(err), | ||
483 | 'while calling sys_perf_event_open().') | ||
484 | |||
485 | if trace_filter: | ||
486 | fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'], | ||
487 | trace_filter) | ||
488 | |||
489 | self.fd = fd | ||
490 | |||
491 | def enable(self): | ||
492 | """Enables the trace event in the kernel. | ||
493 | |||
494 | Enabling the group leader makes reading counters from it and the | ||
495 | events under it possible. | ||
496 | |||
497 | """ | ||
498 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0) | ||
499 | |||
500 | def disable(self): | ||
501 | """Disables the trace event in the kernel. | ||
502 | |||
503 | Disabling the group leader makes reading all counters under it | ||
504 | impossible. | ||
505 | |||
506 | """ | ||
507 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0) | ||
508 | |||
509 | def reset(self): | ||
510 | """Resets the count of the trace event in the kernel.""" | ||
511 | fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) | ||
512 | |||
513 | class TracepointProvider(object): | ||
514 | """Data provider for the stats class. | ||
515 | |||
516 | Manages the events/groups from which it acquires its data. | ||
517 | |||
518 | """ | ||
519 | def __init__(self): | ||
520 | self.group_leaders = [] | ||
521 | self.filters = get_filters() | ||
522 | self._fields = self.get_available_fields() | ||
523 | self._pid = 0 | ||
524 | |||
525 | def get_available_fields(self): | ||
526 | """Returns a list of available event's of format 'event name(filter | ||
527 | name)'. | ||
528 | |||
529 | All available events have directories under | ||
530 | /sys/kernel/debug/tracing/events/ which export information | ||
531 | about the specific event. Therefore, listing the dirs gives us | ||
532 | a list of all available events. | ||
533 | |||
534 | Some events like the vm exit reasons can be filtered for | ||
535 | specific values. To take account for that, the routine below | ||
536 | creates special fields with the following format: | ||
537 | event name(filter name) | ||
538 | |||
539 | """ | ||
540 | path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') | ||
541 | fields = walkdir(path)[1] | ||
542 | extra = [] | ||
543 | for field in fields: | ||
544 | if field in self.filters: | ||
545 | filter_name_, filter_dicts = self.filters[field] | ||
546 | for name in filter_dicts: | ||
547 | extra.append(field + '(' + name + ')') | ||
548 | fields += extra | ||
549 | return fields | ||
550 | |||
551 | def setup_traces(self): | ||
552 | """Creates all event and group objects needed to be able to retrieve | ||
553 | data.""" | ||
554 | if self._pid > 0: | ||
555 | # Fetch list of all threads of the monitored pid, as qemu | ||
556 | # starts a thread for each vcpu. | ||
557 | path = os.path.join('/proc', str(self._pid), 'task') | ||
558 | groupids = walkdir(path)[1] | ||
559 | else: | ||
560 | groupids = get_online_cpus() | ||
561 | |||
562 | # The constant is needed as a buffer for python libs, std | ||
563 | # streams and other files that the script opens. | ||
564 | newlim = len(groupids) * len(self._fields) + 50 | ||
565 | try: | ||
566 | softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) | ||
567 | |||
568 | if hardlim < newlim: | ||
569 | # Now we need CAP_SYS_RESOURCE, to increase the hard limit. | ||
570 | resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim)) | ||
571 | else: | ||
572 | # Raising the soft limit is sufficient. | ||
573 | resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim)) | ||
574 | |||
575 | except ValueError: | ||
576 | sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim)) | ||
577 | |||
578 | for groupid in groupids: | ||
579 | group = Group() | ||
580 | for name in self._fields: | ||
581 | tracepoint = name | ||
582 | tracefilter = None | ||
583 | match = re.match(r'(.*)\((.*)\)', name) | ||
584 | if match: | ||
585 | tracepoint, sub = match.groups() | ||
586 | tracefilter = ('%s==%d\0' % | ||
587 | (self.filters[tracepoint][0], | ||
588 | self.filters[tracepoint][1][sub])) | ||
589 | |||
590 | # From perf_event_open(2): | ||
591 | # pid > 0 and cpu == -1 | ||
592 | # This measures the specified process/thread on any CPU. | ||
593 | # | ||
594 | # pid == -1 and cpu >= 0 | ||
595 | # This measures all processes/threads on the specified CPU. | ||
596 | trace_cpu = groupid if self._pid == 0 else -1 | ||
597 | trace_pid = int(groupid) if self._pid != 0 else -1 | ||
598 | |||
599 | group.add_event(Event(name=name, | ||
600 | group=group, | ||
601 | trace_cpu=trace_cpu, | ||
602 | trace_pid=trace_pid, | ||
603 | trace_point=tracepoint, | ||
604 | trace_filter=tracefilter)) | ||
605 | |||
606 | self.group_leaders.append(group) | ||
607 | |||
608 | def available_fields(self): | ||
609 | return self.get_available_fields() | ||
610 | |||
611 | @property | ||
612 | def fields(self): | ||
613 | return self._fields | ||
614 | |||
615 | @fields.setter | ||
616 | def fields(self, fields): | ||
617 | """Enables/disables the (un)wanted events""" | ||
618 | self._fields = fields | ||
619 | for group in self.group_leaders: | ||
620 | for index, event in enumerate(group.events): | ||
621 | if event.name in fields: | ||
622 | event.reset() | ||
623 | event.enable() | ||
624 | else: | ||
625 | # Do not disable the group leader. | ||
626 | # It would disable all of its events. | ||
627 | if index != 0: | ||
628 | event.disable() | ||
629 | |||
630 | @property | ||
631 | def pid(self): | ||
632 | return self._pid | ||
633 | |||
634 | @pid.setter | ||
635 | def pid(self, pid): | ||
636 | """Changes the monitored pid by setting new traces.""" | ||
637 | self._pid = pid | ||
638 | # The garbage collector will get rid of all Event/Group | ||
639 | # objects and open files after removing the references. | ||
640 | self.group_leaders = [] | ||
641 | self.setup_traces() | ||
642 | self.fields = self._fields | ||
643 | |||
644 | def read(self): | ||
645 | """Returns 'event name: current value' for all enabled events.""" | ||
646 | ret = defaultdict(int) | ||
647 | for group in self.group_leaders: | ||
648 | for name, val in group.read().iteritems(): | ||
649 | if name in self._fields: | ||
650 | ret[name] += val | ||
651 | return ret | ||
652 | |||
653 | class DebugfsProvider(object): | ||
654 | """Provides data from the files that KVM creates in the kvm debugfs | ||
655 | folder.""" | ||
656 | def __init__(self): | ||
657 | self._fields = self.get_available_fields() | ||
658 | self._pid = 0 | ||
659 | self.do_read = True | ||
660 | |||
661 | def get_available_fields(self): | ||
662 | """"Returns a list of available fields. | ||
663 | |||
664 | The fields are all available KVM debugfs files | ||
665 | |||
666 | """ | ||
667 | return walkdir(PATH_DEBUGFS_KVM)[2] | ||
668 | |||
669 | @property | ||
670 | def fields(self): | ||
671 | return self._fields | ||
672 | |||
673 | @fields.setter | ||
674 | def fields(self, fields): | ||
675 | self._fields = fields | ||
676 | |||
677 | @property | ||
678 | def pid(self): | ||
679 | return self._pid | ||
680 | |||
681 | @pid.setter | ||
682 | def pid(self, pid): | ||
683 | if pid != 0: | ||
684 | self._pid = pid | ||
685 | |||
686 | vms = walkdir(PATH_DEBUGFS_KVM)[1] | ||
687 | if len(vms) == 0: | ||
688 | self.do_read = False | ||
689 | |||
690 | self.paths = filter(lambda x: "{}-".format(pid) in x, vms) | ||
691 | |||
692 | else: | ||
693 | self.paths = [''] | ||
694 | self.do_read = True | ||
695 | |||
696 | def read(self): | ||
697 | """Returns a dict with format:'file name / field -> current value'.""" | ||
698 | results = {} | ||
699 | |||
700 | # If no debugfs filtering support is available, then don't read. | ||
701 | if not self.do_read: | ||
702 | return results | ||
703 | |||
704 | for path in self.paths: | ||
705 | for field in self._fields: | ||
706 | results[field] = results.get(field, 0) \ | ||
707 | + self.read_field(field, path) | ||
708 | |||
709 | return results | ||
710 | |||
711 | def read_field(self, field, path): | ||
712 | """Returns the value of a single field from a specific VM.""" | ||
713 | try: | ||
714 | return int(open(os.path.join(PATH_DEBUGFS_KVM, | ||
715 | path, | ||
716 | field)) | ||
717 | .read()) | ||
718 | except IOError: | ||
719 | return 0 | ||
720 | |||
721 | class Stats(object): | ||
722 | """Manages the data providers and the data they provide. | ||
723 | |||
724 | It is used to set filters on the provider's data and collect all | ||
725 | provider data. | ||
726 | |||
727 | """ | ||
728 | def __init__(self, providers, pid, fields=None): | ||
729 | self.providers = providers | ||
730 | self._pid_filter = pid | ||
731 | self._fields_filter = fields | ||
732 | self.values = {} | ||
733 | self.update_provider_pid() | ||
734 | self.update_provider_filters() | ||
735 | |||
736 | def update_provider_filters(self): | ||
737 | """Propagates fields filters to providers.""" | ||
738 | def wanted(key): | ||
739 | if not self._fields_filter: | ||
740 | return True | ||
741 | return re.match(self._fields_filter, key) is not None | ||
742 | |||
743 | # As we reset the counters when updating the fields we can | ||
744 | # also clear the cache of old values. | ||
745 | self.values = {} | ||
746 | for provider in self.providers: | ||
747 | provider_fields = [key for key in provider.get_available_fields() | ||
748 | if wanted(key)] | ||
749 | provider.fields = provider_fields | ||
750 | |||
751 | def update_provider_pid(self): | ||
752 | """Propagates pid filters to providers.""" | ||
753 | for provider in self.providers: | ||
754 | provider.pid = self._pid_filter | ||
755 | |||
756 | @property | ||
757 | def fields_filter(self): | ||
758 | return self._fields_filter | ||
759 | |||
760 | @fields_filter.setter | ||
761 | def fields_filter(self, fields_filter): | ||
762 | self._fields_filter = fields_filter | ||
763 | self.update_provider_filters() | ||
764 | |||
765 | @property | ||
766 | def pid_filter(self): | ||
767 | return self._pid_filter | ||
768 | |||
769 | @pid_filter.setter | ||
770 | def pid_filter(self, pid): | ||
771 | self._pid_filter = pid | ||
772 | self.values = {} | ||
773 | self.update_provider_pid() | ||
774 | |||
775 | def get(self): | ||
776 | """Returns a dict with field -> (value, delta to last value) of all | ||
777 | provider data.""" | ||
778 | for provider in self.providers: | ||
779 | new = provider.read() | ||
780 | for key in provider.fields: | ||
781 | oldval = self.values.get(key, (0, 0)) | ||
782 | newval = new.get(key, 0) | ||
783 | newdelta = None | ||
784 | if oldval is not None: | ||
785 | newdelta = newval - oldval[0] | ||
786 | self.values[key] = (newval, newdelta) | ||
787 | return self.values | ||
788 | |||
789 | LABEL_WIDTH = 40 | ||
790 | NUMBER_WIDTH = 10 | ||
791 | |||
792 | class Tui(object): | ||
793 | """Instruments curses to draw a nice text ui.""" | ||
794 | def __init__(self, stats): | ||
795 | self.stats = stats | ||
796 | self.screen = None | ||
797 | self.drilldown = False | ||
798 | self.update_drilldown() | ||
799 | |||
800 | def __enter__(self): | ||
801 | """Initialises curses for later use. Based on curses.wrapper | ||
802 | implementation from the Python standard library.""" | ||
803 | self.screen = curses.initscr() | ||
804 | curses.noecho() | ||
805 | curses.cbreak() | ||
806 | |||
807 | # The try/catch works around a minor bit of | ||
808 | # over-conscientiousness in the curses module, the error | ||
809 | # return from C start_color() is ignorable. | ||
810 | try: | ||
811 | curses.start_color() | ||
812 | except: | ||
813 | pass | ||
814 | |||
815 | curses.use_default_colors() | ||
816 | return self | ||
817 | |||
818 | def __exit__(self, *exception): | ||
819 | """Resets the terminal to its normal state. Based on curses.wrappre | ||
820 | implementation from the Python standard library.""" | ||
821 | if self.screen: | ||
822 | self.screen.keypad(0) | ||
823 | curses.echo() | ||
824 | curses.nocbreak() | ||
825 | curses.endwin() | ||
826 | |||
827 | def update_drilldown(self): | ||
828 | """Sets or removes a filter that only allows fields without braces.""" | ||
829 | if not self.stats.fields_filter: | ||
830 | self.stats.fields_filter = r'^[^\(]*$' | ||
831 | |||
832 | elif self.stats.fields_filter == r'^[^\(]*$': | ||
833 | self.stats.fields_filter = None | ||
834 | |||
835 | def update_pid(self, pid): | ||
836 | """Propagates pid selection to stats object.""" | ||
837 | self.stats.pid_filter = pid | ||
838 | |||
839 | def refresh(self, sleeptime): | ||
840 | """Refreshes on-screen data.""" | ||
841 | self.screen.erase() | ||
842 | if self.stats.pid_filter > 0: | ||
843 | self.screen.addstr(0, 0, 'kvm statistics - pid {0}' | ||
844 | .format(self.stats.pid_filter), | ||
845 | curses.A_BOLD) | ||
846 | else: | ||
847 | self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) | ||
848 | self.screen.addstr(2, 1, 'Event') | ||
849 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - | ||
850 | len('Total'), 'Total') | ||
851 | self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - | ||
852 | len('Current'), 'Current') | ||
853 | row = 3 | ||
854 | stats = self.stats.get() | ||
855 | def sortkey(x): | ||
856 | if stats[x][1]: | ||
857 | return (-stats[x][1], -stats[x][0]) | ||
858 | else: | ||
859 | return (0, -stats[x][0]) | ||
860 | for key in sorted(stats.keys(), key=sortkey): | ||
861 | |||
862 | if row >= self.screen.getmaxyx()[0]: | ||
863 | break | ||
864 | values = stats[key] | ||
865 | if not values[0] and not values[1]: | ||
866 | break | ||
867 | col = 1 | ||
868 | self.screen.addstr(row, col, key) | ||
869 | col += LABEL_WIDTH | ||
870 | self.screen.addstr(row, col, '%10d' % (values[0],)) | ||
871 | col += NUMBER_WIDTH | ||
872 | if values[1] is not None: | ||
873 | self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) | ||
874 | row += 1 | ||
875 | self.screen.refresh() | ||
876 | |||
877 | def show_filter_selection(self): | ||
878 | """Draws filter selection mask. | ||
879 | |||
880 | Asks for a valid regex and sets the fields filter accordingly. | ||
881 | |||
882 | """ | ||
883 | while True: | ||
884 | self.screen.erase() | ||
885 | self.screen.addstr(0, 0, | ||
886 | "Show statistics for events matching a regex.", | ||
887 | curses.A_BOLD) | ||
888 | self.screen.addstr(2, 0, | ||
889 | "Current regex: {0}" | ||
890 | .format(self.stats.fields_filter)) | ||
891 | self.screen.addstr(3, 0, "New regex: ") | ||
892 | curses.echo() | ||
893 | regex = self.screen.getstr() | ||
894 | curses.noecho() | ||
895 | if len(regex) == 0: | ||
896 | return | ||
897 | try: | ||
898 | re.compile(regex) | ||
899 | self.stats.fields_filter = regex | ||
900 | return | ||
901 | except re.error: | ||
902 | continue | ||
903 | |||
904 | def show_vm_selection(self): | ||
905 | """Draws PID selection mask. | ||
906 | |||
907 | Asks for a pid until a valid pid or 0 has been entered. | ||
908 | |||
909 | """ | ||
910 | while True: | ||
911 | self.screen.erase() | ||
912 | self.screen.addstr(0, 0, | ||
913 | 'Show statistics for specific pid.', | ||
914 | curses.A_BOLD) | ||
915 | self.screen.addstr(1, 0, | ||
916 | 'This might limit the shown data to the trace ' | ||
917 | 'statistics.') | ||
918 | |||
919 | curses.echo() | ||
920 | self.screen.addstr(3, 0, "Pid [0 or pid]: ") | ||
921 | pid = self.screen.getstr() | ||
922 | curses.noecho() | ||
923 | |||
924 | try: | ||
925 | pid = int(pid) | ||
926 | |||
927 | if pid == 0: | ||
928 | self.update_pid(pid) | ||
929 | break | ||
930 | else: | ||
931 | if not os.path.isdir(os.path.join('/proc/', str(pid))): | ||
932 | continue | ||
933 | else: | ||
934 | self.update_pid(pid) | ||
935 | break | ||
936 | |||
937 | except ValueError: | ||
938 | continue | ||
939 | |||
940 | def show_stats(self): | ||
941 | """Refreshes the screen and processes user input.""" | ||
942 | sleeptime = 0.25 | ||
943 | while True: | ||
944 | self.refresh(sleeptime) | ||
945 | curses.halfdelay(int(sleeptime * 10)) | ||
946 | sleeptime = 3 | ||
947 | try: | ||
948 | char = self.screen.getkey() | ||
949 | if char == 'x': | ||
950 | self.drilldown = not self.drilldown | ||
951 | self.update_drilldown() | ||
952 | if char == 'q': | ||
953 | break | ||
954 | if char == 'f': | ||
955 | self.show_filter_selection() | ||
956 | if char == 'p': | ||
957 | self.show_vm_selection() | ||
958 | except KeyboardInterrupt: | ||
959 | break | ||
960 | except curses.error: | ||
961 | continue | ||
962 | |||
963 | def batch(stats): | ||
964 | """Prints statistics in a key, value format.""" | ||
965 | s = stats.get() | ||
966 | time.sleep(1) | ||
967 | s = stats.get() | ||
968 | for key in sorted(s.keys()): | ||
969 | values = s[key] | ||
970 | print '%-42s%10d%10d' % (key, values[0], values[1]) | ||
971 | |||
972 | def log(stats): | ||
973 | """Prints statistics as reiterating key block, multiple value blocks.""" | ||
974 | keys = sorted(stats.get().iterkeys()) | ||
975 | def banner(): | ||
976 | for k in keys: | ||
977 | print '%s' % k, | ||
978 | |||
979 | def statline(): | ||
980 | s = stats.get() | ||
981 | for k in keys: | ||
982 | print ' %9d' % s[k][1], | ||
983 | |||
984 | line = 0 | ||
985 | banner_repeat = 20 | ||
986 | while True: | ||
987 | time.sleep(1) | ||
988 | if line % banner_repeat == 0: | ||
989 | banner() | ||
990 | statline() | ||
991 | line += 1 | ||
992 | |||
993 | def get_options(): | ||
994 | """Returns processed program arguments.""" | ||
995 | description_text = """ | ||
996 | This script displays various statistics about VMs running under KVM. | ||
997 | The statistics are gathered from the KVM debugfs entries and / or the | ||
998 | currently available perf traces. | ||
999 | |||
1000 | The monitoring takes additional cpu cycles and might affect the VM's | ||
1001 | performance. | ||
1002 | |||
1003 | Requirements: | ||
1004 | - Access to: | ||
1005 | /sys/kernel/debug/kvm | ||
1006 | /sys/kernel/debug/trace/events/* | ||
1007 | /proc/pid/task | ||
1008 | - /proc/sys/kernel/perf_event_paranoid < 1 if user has no | ||
1009 | CAP_SYS_ADMIN and perf events are used. | ||
1010 | - CAP_SYS_RESOURCE if the hard limit is not high enough to allow | ||
1011 | the large number of files that are possibly opened. | ||
1012 | """ | ||
1013 | |||
1014 | class PlainHelpFormatter(optparse.IndentedHelpFormatter): | ||
1015 | def format_description(self, description): | ||
1016 | if description: | ||
1017 | return description + "\n" | ||
1018 | else: | ||
1019 | return "" | ||
1020 | |||
1021 | optparser = optparse.OptionParser(description=description_text, | ||
1022 | formatter=PlainHelpFormatter()) | ||
1023 | optparser.add_option('-1', '--once', '--batch', | ||
1024 | action='store_true', | ||
1025 | default=False, | ||
1026 | dest='once', | ||
1027 | help='run in batch mode for one second', | ||
1028 | ) | ||
1029 | optparser.add_option('-l', '--log', | ||
1030 | action='store_true', | ||
1031 | default=False, | ||
1032 | dest='log', | ||
1033 | help='run in logging mode (like vmstat)', | ||
1034 | ) | ||
1035 | optparser.add_option('-t', '--tracepoints', | ||
1036 | action='store_true', | ||
1037 | default=False, | ||
1038 | dest='tracepoints', | ||
1039 | help='retrieve statistics from tracepoints', | ||
1040 | ) | ||
1041 | optparser.add_option('-d', '--debugfs', | ||
1042 | action='store_true', | ||
1043 | default=False, | ||
1044 | dest='debugfs', | ||
1045 | help='retrieve statistics from debugfs', | ||
1046 | ) | ||
1047 | optparser.add_option('-f', '--fields', | ||
1048 | action='store', | ||
1049 | default=None, | ||
1050 | dest='fields', | ||
1051 | help='fields to display (regex)', | ||
1052 | ) | ||
1053 | optparser.add_option('-p', '--pid', | ||
1054 | action='store', | ||
1055 | default=0, | ||
1056 | type=int, | ||
1057 | dest='pid', | ||
1058 | help='restrict statistics to pid', | ||
1059 | ) | ||
1060 | (options, _) = optparser.parse_args(sys.argv) | ||
1061 | return options | ||
1062 | |||
1063 | def get_providers(options): | ||
1064 | """Returns a list of data providers depending on the passed options.""" | ||
1065 | providers = [] | ||
1066 | |||
1067 | if options.tracepoints: | ||
1068 | providers.append(TracepointProvider()) | ||
1069 | if options.debugfs: | ||
1070 | providers.append(DebugfsProvider()) | ||
1071 | if len(providers) == 0: | ||
1072 | providers.append(TracepointProvider()) | ||
1073 | |||
1074 | return providers | ||
1075 | |||
1076 | def check_access(options): | ||
1077 | """Exits if the current user can't access all needed directories.""" | ||
1078 | if not os.path.exists('/sys/kernel/debug'): | ||
1079 | sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') | ||
1080 | sys.exit(1) | ||
1081 | |||
1082 | if not os.path.exists(PATH_DEBUGFS_KVM): | ||
1083 | sys.stderr.write("Please make sure, that debugfs is mounted and " | ||
1084 | "readable by the current user:\n" | ||
1085 | "('mount -t debugfs debugfs /sys/kernel/debug')\n" | ||
1086 | "Also ensure, that the kvm modules are loaded.\n") | ||
1087 | sys.exit(1) | ||
1088 | |||
1089 | if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints | ||
1090 | or not options.debugfs): | ||
1091 | sys.stderr.write("Please enable CONFIG_TRACING in your kernel " | ||
1092 | "when using the option -t (default).\n" | ||
1093 | "If it is enabled, make {0} readable by the " | ||
1094 | "current user.\n" | ||
1095 | .format(PATH_DEBUGFS_TRACING)) | ||
1096 | if options.tracepoints: | ||
1097 | sys.exit(1) | ||
1098 | |||
1099 | sys.stderr.write("Falling back to debugfs statistics!\n") | ||
1100 | options.debugfs = True | ||
1101 | sleep(5) | ||
1102 | |||
1103 | return options | ||
1104 | |||
1105 | def main(): | ||
1106 | options = get_options() | ||
1107 | options = check_access(options) | ||
1108 | |||
1109 | if (options.pid > 0 and | ||
1110 | not os.path.isdir(os.path.join('/proc/', | ||
1111 | str(options.pid)))): | ||
1112 | sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n') | ||
1113 | sys.exit('Specified pid does not exist.') | ||
1114 | |||
1115 | providers = get_providers(options) | ||
1116 | stats = Stats(providers, options.pid, fields=options.fields) | ||
1117 | |||
1118 | if options.log: | ||
1119 | log(stats) | ||
1120 | elif not options.once: | ||
1121 | with Tui(stats) as tui: | ||
1122 | tui.show_stats() | ||
1123 | else: | ||
1124 | batch(stats) | ||
1125 | |||
1126 | if __name__ == "__main__": | ||
1127 | main() | ||
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt new file mode 100644 index 000000000000..b92a153d7115 --- /dev/null +++ b/tools/kvm/kvm_stat/kvm_stat.txt | |||
@@ -0,0 +1,63 @@ | |||
1 | kvm_stat(1) | ||
2 | =========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | kvm_stat - Report KVM kernel module event counters | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'kvm_stat' [OPTION]... | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | kvm_stat prints counts of KVM kernel module trace events. These events signify | ||
16 | state transitions such as guest mode entry and exit. | ||
17 | |||
18 | This tool is useful for observing guest behavior from the host perspective. | ||
19 | Often conclusions about performance or buggy behavior can be drawn from the | ||
20 | output. | ||
21 | |||
22 | The set of KVM kernel module trace events may be specific to the kernel version | ||
23 | or architecture. It is best to check the KVM kernel module source code for the | ||
24 | meaning of events. | ||
25 | |||
26 | OPTIONS | ||
27 | ------- | ||
28 | -1:: | ||
29 | --once:: | ||
30 | --batch:: | ||
31 | run in batch mode for one second | ||
32 | |||
33 | -l:: | ||
34 | --log:: | ||
35 | run in logging mode (like vmstat) | ||
36 | |||
37 | -t:: | ||
38 | --tracepoints:: | ||
39 | retrieve statistics from tracepoints | ||
40 | |||
41 | -d:: | ||
42 | --debugfs:: | ||
43 | retrieve statistics from debugfs | ||
44 | |||
45 | -p<pid>:: | ||
46 | --pid=<pid>:: | ||
47 | limit statistics to one virtual machine (pid) | ||
48 | |||
49 | -f<fields>:: | ||
50 | --fields=<fields>:: | ||
51 | fields to display (regex) | ||
52 | |||
53 | -h:: | ||
54 | --help:: | ||
55 | show help message | ||
56 | |||
57 | SEE ALSO | ||
58 | -------- | ||
59 | 'perf'(1), 'trace-cmd'(1) | ||
60 | |||
61 | AUTHOR | ||
62 | ------ | ||
63 | Stefan Hajnoczi <stefanha@redhat.com> | ||