aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf/scripts/python/compaction-times.py
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 22:49:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 22:49:05 -0400
commit41d859a83c567a9c9f50a34082cc64aab0abb0cd (patch)
treeab911ea521701401413d041e1b92225f3dbdab41 /tools/perf/scripts/python/compaction-times.py
parent4658000955d1864b54890214434e171949c7f1c5 (diff)
parentbac2e4a96d1c0bcce5e9654dcc902f75576b9b03 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Main perf kernel side changes: - uprobes updates/fixes. (Oleg Nesterov) - Add PERF_RECORD_SWITCH to indicate context switches and use it in tooling. (Adrian Hunter) - Support BPF programs attached to uprobes and first steps for BPF tooling support. (Wang Nan) - x86 generic x86 MSR-to-perf PMU driver. (Andy Lutomirski) - x86 Intel PT, LBR and BTS updates. (Alexander Shishkin) - x86 Intel Skylake support. (Andi Kleen) - x86 Intel Knights Landing (KNL) RAPL support. (Dasaratharaman Chandramouli) - x86 Intel Broadwell-DE uncore support. (Kan Liang) - x86 hw breakpoints robustization (Andy Lutomirski) Main perf tooling side changes: - Support Intel PT in several tools, enabling the use of the processor trace feature introduced in Intel Broadwell processors: (Adrian Hunter) # dmesg | grep Performance # [0.188477] Performance Events: PEBS fmt2+, 16-deep LBR, Broadwell events, full-width counters, Intel PMU driver. # perf record -e intel_pt//u -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.216 MB perf.data ] # perf script # then navigate in the tool output to some area, like this one: 184 1030 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba661440 dl_main (/usr/lib64/ld-2.17.so) 185 1457 dl_main (/usr/lib64/ld-2.17.so) => 7f21ba669f10 _dl_new_object (/usr/lib64/ld-2.17.so) 186 9f37 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba677b90 strlen (/usr/lib64/ld-2.17.so) 187 7ba3 strlen (/usr/lib64/ld-2.17.so) => 7f21ba677c75 strlen (/usr/lib64/ld-2.17.so) 188 7c78 strlen (/usr/lib64/ld-2.17.so) => 7f21ba669f3c _dl_new_object (/usr/lib64/ld-2.17.so) 189 9f8a _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba65fab0 calloc@plt (/usr/lib64/ld-2.17.so) 190 fab0 calloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e70 calloc (/usr/lib64/ld-2.17.so) 191 5e87 calloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa90 malloc@plt (/usr/lib64/ld-2.17.so) 192 fa90 malloc@plt (/usr/lib64/ld-2.17.so) => 7f21ba675e60 malloc (/usr/lib64/ld-2.17.so) 193 5e68 malloc (/usr/lib64/ld-2.17.so) => 7f21ba65fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) 194 fa80 __libc_memalign@plt (/usr/lib64/ld-2.17.so) => 7f21ba675d50 __libc_memalign (/usr/lib64/ld-2.17.so) 195 5d63 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e20 __libc_memalign (/usr/lib64/ld-2.17.so) 196 5e40 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675d73 __libc_memalign (/usr/lib64/ld-2.17.so) 197 5d97 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675e18 __libc_memalign (/usr/lib64/ld-2.17.so) 198 5e1e __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba675df9 __libc_memalign (/usr/lib64/ld-2.17.so) 199 5e10 __libc_memalign (/usr/lib64/ld-2.17.so) => 7f21ba669f8f _dl_new_object (/usr/lib64/ld-2.17.so) 200 9fc2 _dl_new_object (/usr/lib64/ld-2.17.so) => 7f21ba678e70 memcpy (/usr/lib64/ld-2.17.so) 201 8e8c memcpy (/usr/lib64/ld-2.17.so) => 7f21ba678ea0 memcpy (/usr/lib64/ld-2.17.so) - Add support for using several Intel PT features (CYC, MTC packets), the relevant documentation was updated in: tools/perf/Documentation/intel-pt.txt briefly describing those packets, its purposes, how to configure them in the event config terms and relevant external documentation for further reading. (Adrian Hunter) - Introduce support for probing at an absolute address, for user and kernel 'perf probe's, useful when one have the symbol maps on a developer machine but not on an embedded system. (Wang Nan) - Add Intel BTS support, with a call-graph script to show it and PT in use in a GUI using 'perf script' python scripting with postgresql and Qt. (Adrian Hunter) - Allow selecting the type of callchains per event, including disabling callchains in all but one entry in an event list, to save space, and also to ask for the callchains collected in one event to be used in other events. (Kan Liang) - Beautify more syscall arguments in 'perf trace': (Arnaldo Carvalho de Melo) * A bunch more translate file/pathnames from pointers to strings. * Convert numbers to strings for the 'keyctl' syscall 'option' arg. * Add missing 'clockid' entries. - Introduce 'srcfile' sort key: (Andi Kleen) # perf record -F 10000 usleep 1 # perf report --stdio --dsos '[kernel.vmlinux]' -s srcfile <SNIP> # Overhead Source File 26.49% copy_page_64.S 5.49% signal.c 0.51% msr.h # It can be combined with other fields, for instance, experiment with '-s srcfile,symbol'. There are some oddities in some distros and with some specific DSOs, being investigated, so your mileage may vary. - Support per-event 'freq' term: (Namhyung Kim) $ perf record -e 'cpu/instructions,freq=1234/',cycles -c 1000 sleep 1 $ perf evlist -F cpu/instructions,freq=1234/: sample_freq=1234 cycles: sample_period=1000 $ - Deref sys_enter pointer args with contents from probe:vfs_getname, showing pathnames instead of pointers in many syscalls in 'perf trace'. (Arnaldo Carvalho de Melo) - Stop collecting /proc/kallsyms in perf.data files, saving about 4.5MB on a typical x86-64 system, use the the symbol resolution routines used in all the other tools (report, top, etc) now that we can ask libtraceevent to use perf's symbol resolution code. (Arnaldo Carvalho de Melo) - Allow filtering out of perf's PID via 'perf record --exclude-perf'. (Wang Nan) - 'perf trace' now supports syscall groups, like strace, i.e: $ trace -e file touch file Will expand 'file' into multiple, file related, syscalls. More work needed to add extra groups for other syscall groups, and also to complement what was added for the 'file' group, included as a proof of concept. (Arnaldo Carvalho de Melo) - Add lock_pi stresser to 'perf bench futex', to test the kernel code related to FUTEX_(UN)LOCK_PI. (Davidlohr Bueso) - Let user have timestamps with per-thread recording in 'perf record' (Adrian Hunter) - ... and tons of other changes, see the shortlog and the Git log for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (240 commits) perf evlist: Add backpointer for perf_env to evlist perf tools: Rename perf_session_env to perf_env perf tools: Do not change lib/api/fs/debugfs directly perf tools: Add tracing_path and remove unneeded functions perf buildid: Introduce sysfs/filename__sprintf_build_id perf evsel: Add a backpointer to the evlist a evsel is in perf trace: Add header with copyright and background info perf scripts python: Add new compaction-times script perf stat: Get correct cpu id for print_aggr tools lib traceeveent: Allow for negative numbers in print format perf script: Add --[no-]-demangle/--[no-]-demangle-kernel tracing/uprobes: Do not print '0x (null)' when offset is 0 perf probe: Support probing at absolute address perf probe: Fix error reported when offset without function perf probe: Fix list result when address is zero perf probe: Fix list result when symbol can't be found tools build: Allow duplicate objects in the object list perf tools: Remove export.h from MANIFEST perf probe: Prevent segfault when reading probe point with absolute address perf tools: Update Intel PT documentation ...
Diffstat (limited to 'tools/perf/scripts/python/compaction-times.py')
-rw-r--r--tools/perf/scripts/python/compaction-times.py311
1 files changed, 311 insertions, 0 deletions
diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py
new file mode 100644
index 000000000000..239cb0568ec3
--- /dev/null
+++ b/tools/perf/scripts/python/compaction-times.py
@@ -0,0 +1,311 @@
1# report time spent in compaction
2# Licensed under the terms of the GNU GPL License version 2
3
4# testing:
5# 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones
6
7import os
8import sys
9import re
10
11import signal
12signal.signal(signal.SIGPIPE, signal.SIG_DFL)
13
14usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n"
15
16class popt:
17 DISP_DFL = 0
18 DISP_PROC = 1
19 DISP_PROC_VERBOSE=2
20
21class topt:
22 DISP_TIME = 0
23 DISP_MIG = 1
24 DISP_ISOLFREE = 2
25 DISP_ISOLMIG = 4
26 DISP_ALL = 7
27
28class comm_filter:
29 def __init__(self, re):
30 self.re = re
31
32 def filter(self, pid, comm):
33 m = self.re.search(comm)
34 return m == None or m.group() == ""
35
36class pid_filter:
37 def __init__(self, low, high):
38 self.low = (0 if low == "" else int(low))
39 self.high = (0 if high == "" else int(high))
40
41 def filter(self, pid, comm):
42 return not (pid >= self.low and (self.high == 0 or pid <= self.high))
43
44def set_type(t):
45 global opt_disp
46 opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t)
47
48def ns(sec, nsec):
49 return (sec * 1000000000) + nsec
50
51def time(ns):
52 return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000)
53
54class pair:
55 def __init__(self, aval, bval, alabel = None, blabel = None):
56 self.alabel = alabel
57 self.blabel = blabel
58 self.aval = aval
59 self.bval = bval
60
61 def __add__(self, rhs):
62 self.aval += rhs.aval
63 self.bval += rhs.bval
64 return self
65
66 def __str__(self):
67 return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval)
68
69class cnode:
70 def __init__(self, ns):
71 self.ns = ns
72 self.migrated = pair(0, 0, "moved", "failed")
73 self.fscan = pair(0,0, "scanned", "isolated")
74 self.mscan = pair(0,0, "scanned", "isolated")
75
76 def __add__(self, rhs):
77 self.ns += rhs.ns
78 self.migrated += rhs.migrated
79 self.fscan += rhs.fscan
80 self.mscan += rhs.mscan
81 return self
82
83 def __str__(self):
84 prev = 0
85 s = "%s " % time(self.ns)
86 if (opt_disp & topt.DISP_MIG):
87 s += "migration: %s" % self.migrated
88 prev = 1
89 if (opt_disp & topt.DISP_ISOLFREE):
90 s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan)
91 prev = 1
92 if (opt_disp & topt.DISP_ISOLMIG):
93 s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan)
94 return s
95
96 def complete(self, secs, nsecs):
97 self.ns = ns(secs, nsecs) - self.ns
98
99 def increment(self, migrated, fscan, mscan):
100 if (migrated != None):
101 self.migrated += migrated
102 if (fscan != None):
103 self.fscan += fscan
104 if (mscan != None):
105 self.mscan += mscan
106
107
108class chead:
109 heads = {}
110 val = cnode(0);
111 fobj = None
112
113 @classmethod
114 def add_filter(cls, filter):
115 cls.fobj = filter
116
117 @classmethod
118 def create_pending(cls, pid, comm, start_secs, start_nsecs):
119 filtered = 0
120 try:
121 head = cls.heads[pid]
122 filtered = head.is_filtered()
123 except KeyError:
124 if cls.fobj != None:
125 filtered = cls.fobj.filter(pid, comm)
126 head = cls.heads[pid] = chead(comm, pid, filtered)
127
128 if not filtered:
129 head.mark_pending(start_secs, start_nsecs)
130
131 @classmethod
132 def increment_pending(cls, pid, migrated, fscan, mscan):
133 head = cls.heads[pid]
134 if not head.is_filtered():
135 if head.is_pending():
136 head.do_increment(migrated, fscan, mscan)
137 else:
138 sys.stderr.write("missing start compaction event for pid %d\n" % pid)
139
140 @classmethod
141 def complete_pending(cls, pid, secs, nsecs):
142 head = cls.heads[pid]
143 if not head.is_filtered():
144 if head.is_pending():
145 head.make_complete(secs, nsecs)
146 else:
147 sys.stderr.write("missing start compaction event for pid %d\n" % pid)
148
149 @classmethod
150 def gen(cls):
151 if opt_proc != popt.DISP_DFL:
152 for i in cls.heads:
153 yield cls.heads[i]
154
155 @classmethod
156 def str(cls):
157 return cls.val
158
159 def __init__(self, comm, pid, filtered):
160 self.comm = comm
161 self.pid = pid
162 self.val = cnode(0)
163 self.pending = None
164 self.filtered = filtered
165 self.list = []
166
167 def __add__(self, rhs):
168 self.ns += rhs.ns
169 self.val += rhs.val
170 return self
171
172 def mark_pending(self, secs, nsecs):
173 self.pending = cnode(ns(secs, nsecs))
174
175 def do_increment(self, migrated, fscan, mscan):
176 self.pending.increment(migrated, fscan, mscan)
177
178 def make_complete(self, secs, nsecs):
179 self.pending.complete(secs, nsecs)
180 chead.val += self.pending
181
182 if opt_proc != popt.DISP_DFL:
183 self.val += self.pending
184
185 if opt_proc == popt.DISP_PROC_VERBOSE:
186 self.list.append(self.pending)
187 self.pending = None
188
189 def enumerate(self):
190 if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered():
191 for i, pelem in enumerate(self.list):
192 sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem))
193
194 def is_pending(self):
195 return self.pending != None
196
197 def is_filtered(self):
198 return self.filtered
199
200 def display(self):
201 if not self.is_filtered():
202 sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val))
203
204
205def trace_end():
206 sys.stdout.write("total: %s\n" % chead.str())
207 for i in chead.gen():
208 i.display(),
209 i.enumerate()
210
211def compaction__mm_compaction_migratepages(event_name, context, common_cpu,
212 common_secs, common_nsecs, common_pid, common_comm,
213 common_callchain, nr_migrated, nr_failed):
214
215 chead.increment_pending(common_pid,
216 pair(nr_migrated, nr_failed), None, None)
217
218def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu,
219 common_secs, common_nsecs, common_pid, common_comm,
220 common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
221
222 chead.increment_pending(common_pid,
223 None, pair(nr_scanned, nr_taken), None)
224
225def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu,
226 common_secs, common_nsecs, common_pid, common_comm,
227 common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
228
229 chead.increment_pending(common_pid,
230 None, None, pair(nr_scanned, nr_taken))
231
232def compaction__mm_compaction_end(event_name, context, common_cpu,
233 common_secs, common_nsecs, common_pid, common_comm,
234 common_callchain, zone_start, migrate_start, free_start, zone_end,
235 sync, status):
236
237 chead.complete_pending(common_pid, common_secs, common_nsecs)
238
239def compaction__mm_compaction_begin(event_name, context, common_cpu,
240 common_secs, common_nsecs, common_pid, common_comm,
241 common_callchain, zone_start, migrate_start, free_start, zone_end,
242 sync):
243
244 chead.create_pending(common_pid, common_comm, common_secs, common_nsecs)
245
246def pr_help():
247 global usage
248
249 sys.stdout.write(usage)
250 sys.stdout.write("\n")
251 sys.stdout.write("-h display this help\n")
252 sys.stdout.write("-p display by process\n")
253 sys.stdout.write("-pv display by process (verbose)\n")
254 sys.stdout.write("-t display stall times only\n")
255 sys.stdout.write("-m display stats for migration\n")
256 sys.stdout.write("-fs display stats for free scanner\n")
257 sys.stdout.write("-ms display stats for migration scanner\n")
258 sys.stdout.write("-u display results in microseconds (default nanoseconds)\n")
259
260
261comm_re = None
262pid_re = None
263pid_regex = "^(\d*)-(\d*)$|^(\d*)$"
264
265opt_proc = popt.DISP_DFL
266opt_disp = topt.DISP_ALL
267
268opt_ns = True
269
270argc = len(sys.argv) - 1
271if argc >= 1:
272 pid_re = re.compile(pid_regex)
273
274 for i, opt in enumerate(sys.argv[1:]):
275 if opt[0] == "-":
276 if opt == "-h":
277 pr_help()
278 exit(0);
279 elif opt == "-p":
280 opt_proc = popt.DISP_PROC
281 elif opt == "-pv":
282 opt_proc = popt.DISP_PROC_VERBOSE
283 elif opt == '-u':
284 opt_ns = False
285 elif opt == "-t":
286 set_type(topt.DISP_TIME)
287 elif opt == "-m":
288 set_type(topt.DISP_MIG)
289 elif opt == "-fs":
290 set_type(topt.DISP_ISOLFREE)
291 elif opt == "-ms":
292 set_type(topt.DISP_ISOLMIG)
293 else:
294 sys.exit(usage)
295
296 elif i == argc - 1:
297 m = pid_re.search(opt)
298 if m != None and m.group() != "":
299 if m.group(3) != None:
300 f = pid_filter(m.group(3), m.group(3))
301 else:
302 f = pid_filter(m.group(1), m.group(2))
303 else:
304 try:
305 comm_re=re.compile(opt)
306 except:
307 sys.stderr.write("invalid regex '%s'" % opt)
308 sys.exit(usage)
309 f = comm_filter(comm_re)
310
311 chead.add_filter(f)