diff options
Diffstat (limited to 'Documentation/trace')
-rw-r--r-- | Documentation/trace/ftrace-design.txt | 153 | ||||
-rw-r--r-- | Documentation/trace/kmemtrace.txt | 126 | ||||
-rw-r--r-- | Documentation/trace/kprobetrace.txt | 2 | ||||
-rw-r--r-- | Documentation/trace/postprocess/trace-vmscan-postprocess.pl | 686 |
4 files changed, 835 insertions, 132 deletions
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index f1f81afee8a0..dc52bd442c92 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt | |||
@@ -13,6 +13,9 @@ Note that this focuses on architecture implementation details only. If you | |||
13 | want more explanation of a feature in terms of common code, review the common | 13 | want more explanation of a feature in terms of common code, review the common |
14 | ftrace.txt file. | 14 | ftrace.txt file. |
15 | 15 | ||
16 | Ideally, everyone who wishes to retain performance while supporting tracing in | ||
17 | their kernel should make it all the way to dynamic ftrace support. | ||
18 | |||
16 | 19 | ||
17 | Prerequisites | 20 | Prerequisites |
18 | ------------- | 21 | ------------- |
@@ -215,7 +218,7 @@ An arch may pass in a unique value (frame pointer) to both the entering and | |||
215 | exiting of a function. On exit, the value is compared and if it does not | 218 | exiting of a function. On exit, the value is compared and if it does not |
216 | match, then it will panic the kernel. This is largely a sanity check for bad | 219 | match, then it will panic the kernel. This is largely a sanity check for bad |
217 | code generation with gcc. If gcc for your port sanely updates the frame | 220 | code generation with gcc. If gcc for your port sanely updates the frame |
218 | pointer under different opitmization levels, then ignore this option. | 221 | pointer under different optimization levels, then ignore this option. |
219 | 222 | ||
220 | However, adding support for it isn't terribly difficult. In your assembly code | 223 | However, adding support for it isn't terribly difficult. In your assembly code |
221 | that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument. | 224 | that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument. |
@@ -234,7 +237,7 @@ If you can't trace NMI functions, then skip this option. | |||
234 | 237 | ||
235 | 238 | ||
236 | HAVE_SYSCALL_TRACEPOINTS | 239 | HAVE_SYSCALL_TRACEPOINTS |
237 | --------------------- | 240 | ------------------------ |
238 | 241 | ||
239 | You need very few things to get the syscalls tracing in an arch. | 242 | You need very few things to get the syscalls tracing in an arch. |
240 | 243 | ||
@@ -250,12 +253,152 @@ You need very few things to get the syscalls tracing in an arch. | |||
250 | HAVE_FTRACE_MCOUNT_RECORD | 253 | HAVE_FTRACE_MCOUNT_RECORD |
251 | ------------------------- | 254 | ------------------------- |
252 | 255 | ||
253 | See scripts/recordmcount.pl for more info. | 256 | See scripts/recordmcount.pl for more info. Just fill in the arch-specific |
257 | details for how to locate the addresses of mcount call sites via objdump. | ||
258 | This option doesn't make much sense without also implementing dynamic ftrace. | ||
254 | 259 | ||
260 | |||
261 | HAVE_DYNAMIC_FTRACE | ||
262 | ------------------- | ||
263 | |||
264 | You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so | ||
265 | scroll your reader back up if you got over eager. | ||
266 | |||
267 | Once those are out of the way, you will need to implement: | ||
268 | - asm/ftrace.h: | ||
269 | - MCOUNT_ADDR | ||
270 | - ftrace_call_adjust() | ||
271 | - struct dyn_arch_ftrace{} | ||
272 | - asm code: | ||
273 | - mcount() (new stub) | ||
274 | - ftrace_caller() | ||
275 | - ftrace_call() | ||
276 | - ftrace_stub() | ||
277 | - C code: | ||
278 | - ftrace_dyn_arch_init() | ||
279 | - ftrace_make_nop() | ||
280 | - ftrace_make_call() | ||
281 | - ftrace_update_ftrace_func() | ||
282 | |||
283 | First you will need to fill out some arch details in your asm/ftrace.h. | ||
284 | |||
285 | Define MCOUNT_ADDR as the address of your mcount symbol similar to: | ||
286 | #define MCOUNT_ADDR ((unsigned long)mcount) | ||
287 | Since no one else will have a decl for that function, you will need to: | ||
288 | extern void mcount(void); | ||
289 | |||
290 | You will also need the helper function ftrace_call_adjust(). Most people | ||
291 | will be able to stub it out like so: | ||
292 | static inline unsigned long ftrace_call_adjust(unsigned long addr) | ||
293 | { | ||
294 | return addr; | ||
295 | } | ||
255 | <details to be filled> | 296 | <details to be filled> |
256 | 297 | ||
298 | Lastly you will need the custom dyn_arch_ftrace structure. If you need | ||
299 | some extra state when runtime patching arbitrary call sites, this is the | ||
300 | place. For now though, create an empty struct: | ||
301 | struct dyn_arch_ftrace { | ||
302 | /* No extra data needed */ | ||
303 | }; | ||
304 | |||
305 | With the header out of the way, we can fill out the assembly code. While we | ||
306 | did already create a mcount() function earlier, dynamic ftrace only wants a | ||
307 | stub function. This is because the mcount() will only be used during boot | ||
308 | and then all references to it will be patched out never to return. Instead, | ||
309 | the guts of the old mcount() will be used to create a new ftrace_caller() | ||
310 | function. Because the two are hard to merge, it will most likely be a lot | ||
311 | easier to have two separate definitions split up by #ifdefs. Same goes for | ||
312 | the ftrace_stub() as that will now be inlined in ftrace_caller(). | ||
313 | |||
314 | Before we get confused anymore, let's check out some pseudo code so you can | ||
315 | implement your own stuff in assembly: | ||
257 | 316 | ||
258 | HAVE_DYNAMIC_FTRACE | 317 | void mcount(void) |
259 | --------------------- | 318 | { |
319 | return; | ||
320 | } | ||
321 | |||
322 | void ftrace_caller(void) | ||
323 | { | ||
324 | /* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */ | ||
325 | |||
326 | /* save all state needed by the ABI (see paragraph above) */ | ||
327 | |||
328 | unsigned long frompc = ...; | ||
329 | unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE; | ||
330 | |||
331 | ftrace_call: | ||
332 | ftrace_stub(frompc, selfpc); | ||
333 | |||
334 | /* restore all state needed by the ABI */ | ||
335 | |||
336 | ftrace_stub: | ||
337 | return; | ||
338 | } | ||
339 | |||
340 | This might look a little odd at first, but keep in mind that we will be runtime | ||
341 | patching multiple things. First, only functions that we actually want to trace | ||
342 | will be patched to call ftrace_caller(). Second, since we only have one tracer | ||
343 | active at a time, we will patch the ftrace_caller() function itself to call the | ||
344 | specific tracer in question. That is the point of the ftrace_call label. | ||
345 | |||
346 | With that in mind, let's move on to the C code that will actually be doing the | ||
347 | runtime patching. You'll need a little knowledge of your arch's opcodes in | ||
348 | order to make it through the next section. | ||
349 | |||
350 | Every arch has an init callback function. If you need to do something early on | ||
351 | to initialize some state, this is the time to do that. Otherwise, this simple | ||
352 | function below should be sufficient for most people: | ||
353 | |||
354 | int __init ftrace_dyn_arch_init(void *data) | ||
355 | { | ||
356 | /* return value is done indirectly via data */ | ||
357 | *(unsigned long *)data = 0; | ||
358 | |||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | There are two functions that are used to do runtime patching of arbitrary | ||
363 | functions. The first is used to turn the mcount call site into a nop (which | ||
364 | is what helps us retain runtime performance when not tracing). The second is | ||
365 | used to turn the mcount call site into a call to an arbitrary location (but | ||
366 | typically that is ftracer_caller()). See the general function definition in | ||
367 | linux/ftrace.h for the functions: | ||
368 | ftrace_make_nop() | ||
369 | ftrace_make_call() | ||
370 | The rec->ip value is the address of the mcount call site that was collected | ||
371 | by the scripts/recordmcount.pl during build time. | ||
372 | |||
373 | The last function is used to do runtime patching of the active tracer. This | ||
374 | will be modifying the assembly code at the location of the ftrace_call symbol | ||
375 | inside of the ftrace_caller() function. So you should have sufficient padding | ||
376 | at that location to support the new function calls you'll be inserting. Some | ||
377 | people will be using a "call" type instruction while others will be using a | ||
378 | "branch" type instruction. Specifically, the function is: | ||
379 | ftrace_update_ftrace_func() | ||
380 | |||
381 | |||
382 | HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER | ||
383 | ------------------------------------------------ | ||
384 | |||
385 | The function grapher needs a few tweaks in order to work with dynamic ftrace. | ||
386 | Basically, you will need to: | ||
387 | - update: | ||
388 | - ftrace_caller() | ||
389 | - ftrace_graph_call() | ||
390 | - ftrace_graph_caller() | ||
391 | - implement: | ||
392 | - ftrace_enable_ftrace_graph_caller() | ||
393 | - ftrace_disable_ftrace_graph_caller() | ||
260 | 394 | ||
261 | <details to be filled> | 395 | <details to be filled> |
396 | Quick notes: | ||
397 | - add a nop stub after the ftrace_call location named ftrace_graph_call; | ||
398 | stub needs to be large enough to support a call to ftrace_graph_caller() | ||
399 | - update ftrace_graph_caller() to work with being called by the new | ||
400 | ftrace_caller() since some semantics may have changed | ||
401 | - ftrace_enable_ftrace_graph_caller() will runtime patch the | ||
402 | ftrace_graph_call location with a call to ftrace_graph_caller() | ||
403 | - ftrace_disable_ftrace_graph_caller() will runtime patch the | ||
404 | ftrace_graph_call location with nops | ||
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt deleted file mode 100644 index 6308735e58ca..000000000000 --- a/Documentation/trace/kmemtrace.txt +++ /dev/null | |||
@@ -1,126 +0,0 @@ | |||
1 | kmemtrace - Kernel Memory Tracer | ||
2 | |||
3 | by Eduard - Gabriel Munteanu | ||
4 | <eduard.munteanu@linux360.ro> | ||
5 | |||
6 | I. Introduction | ||
7 | =============== | ||
8 | |||
9 | kmemtrace helps kernel developers figure out two things: | ||
10 | 1) how different allocators (SLAB, SLUB etc.) perform | ||
11 | 2) how kernel code allocates memory and how much | ||
12 | |||
13 | To do this, we trace every allocation and export information to the userspace | ||
14 | through the relay interface. We export things such as the number of requested | ||
15 | bytes, the number of bytes actually allocated (i.e. including internal | ||
16 | fragmentation), whether this is a slab allocation or a plain kmalloc() and so | ||
17 | on. | ||
18 | |||
19 | The actual analysis is performed by a userspace tool (see section III for | ||
20 | details on where to get it from). It logs the data exported by the kernel, | ||
21 | processes it and (as of writing this) can provide the following information: | ||
22 | - the total amount of memory allocated and fragmentation per call-site | ||
23 | - the amount of memory allocated and fragmentation per allocation | ||
24 | - total memory allocated and fragmentation in the collected dataset | ||
25 | - number of cross-CPU allocation and frees (makes sense in NUMA environments) | ||
26 | |||
27 | Moreover, it can potentially find inconsistent and erroneous behavior in | ||
28 | kernel code, such as using slab free functions on kmalloc'ed memory or | ||
29 | allocating less memory than requested (but not truly failed allocations). | ||
30 | |||
31 | kmemtrace also makes provisions for tracing on some arch and analysing the | ||
32 | data on another. | ||
33 | |||
34 | II. Design and goals | ||
35 | ==================== | ||
36 | |||
37 | kmemtrace was designed to handle rather large amounts of data. Thus, it uses | ||
38 | the relay interface to export whatever is logged to userspace, which then | ||
39 | stores it. Analysis and reporting is done asynchronously, that is, after the | ||
40 | data is collected and stored. By design, it allows one to log and analyse | ||
41 | on different machines and different arches. | ||
42 | |||
43 | As of writing this, the ABI is not considered stable, though it might not | ||
44 | change much. However, no guarantees are made about compatibility yet. When | ||
45 | deemed stable, the ABI should still allow easy extension while maintaining | ||
46 | backward compatibility. This is described further in Documentation/ABI. | ||
47 | |||
48 | Summary of design goals: | ||
49 | - allow logging and analysis to be done across different machines | ||
50 | - be fast and anticipate usage in high-load environments (*) | ||
51 | - be reasonably extensible | ||
52 | - make it possible for GNU/Linux distributions to have kmemtrace | ||
53 | included in their repositories | ||
54 | |||
55 | (*) - one of the reasons Pekka Enberg's original userspace data analysis | ||
56 | tool's code was rewritten from Perl to C (although this is more than a | ||
57 | simple conversion) | ||
58 | |||
59 | |||
60 | III. Quick usage guide | ||
61 | ====================== | ||
62 | |||
63 | 1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable | ||
64 | CONFIG_KMEMTRACE). | ||
65 | |||
66 | 2) Get the userspace tool and build it: | ||
67 | $ git clone git://repo.or.cz/kmemtrace-user.git # current repository | ||
68 | $ cd kmemtrace-user/ | ||
69 | $ ./autogen.sh | ||
70 | $ ./configure | ||
71 | $ make | ||
72 | |||
73 | 3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the | ||
74 | 'single' runlevel (so that relay buffers don't fill up easily), and run | ||
75 | kmemtrace: | ||
76 | # '$' does not mean user, but root here. | ||
77 | $ mount -t debugfs none /sys/kernel/debug | ||
78 | $ mount -t proc none /proc | ||
79 | $ cd path/to/kmemtrace-user/ | ||
80 | $ ./kmemtraced | ||
81 | Wait a bit, then stop it with CTRL+C. | ||
82 | $ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't | ||
83 | # overrun, should | ||
84 | # be zero. | ||
85 | $ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to | ||
86 | check its correctness] | ||
87 | $ ./kmemtrace-report | ||
88 | |||
89 | Now you should have a nice and short summary of how the allocator performs. | ||
90 | |||
91 | IV. FAQ and known issues | ||
92 | ======================== | ||
93 | |||
94 | Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix | ||
95 | this? Should I worry? | ||
96 | A: If it's non-zero, this affects kmemtrace's accuracy, depending on how | ||
97 | large the number is. You can fix it by supplying a higher | ||
98 | 'kmemtrace.subbufs=N' kernel parameter. | ||
99 | --- | ||
100 | |||
101 | Q: kmemtrace_check reports errors, how do I fix this? Should I worry? | ||
102 | A: This is a bug and should be reported. It can occur for a variety of | ||
103 | reasons: | ||
104 | - possible bugs in relay code | ||
105 | - possible misuse of relay by kmemtrace | ||
106 | - timestamps being collected unorderly | ||
107 | Or you may fix it yourself and send us a patch. | ||
108 | --- | ||
109 | |||
110 | Q: kmemtrace_report shows many errors, how do I fix this? Should I worry? | ||
111 | A: This is a known issue and I'm working on it. These might be true errors | ||
112 | in kernel code, which may have inconsistent behavior (e.g. allocating memory | ||
113 | with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed | ||
114 | out this behavior may work with SLAB, but may fail with other allocators. | ||
115 | |||
116 | It may also be due to lack of tracing in some unusual allocator functions. | ||
117 | |||
118 | We don't want bug reports regarding this issue yet. | ||
119 | --- | ||
120 | |||
121 | V. See also | ||
122 | =========== | ||
123 | |||
124 | Documentation/kernel-parameters.txt | ||
125 | Documentation/ABI/testing/debugfs-kmemtrace | ||
126 | |||
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index ec94748ae65b..5f77d94598dd 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt | |||
@@ -42,7 +42,7 @@ Synopsis of kprobe_events | |||
42 | +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) | 42 | +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**) |
43 | NAME=FETCHARG : Set NAME as the argument name of FETCHARG. | 43 | NAME=FETCHARG : Set NAME as the argument name of FETCHARG. |
44 | FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types | 44 | FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types |
45 | (u8/u16/u32/u64/s8/s16/s32/s64) are supported. | 45 | (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported. |
46 | 46 | ||
47 | (*) only for return probe. | 47 | (*) only for return probe. |
48 | (**) this is useful for fetching a field of data structures. | 48 | (**) this is useful for fetching a field of data structures. |
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl new file mode 100644 index 000000000000..1b55146d1c8d --- /dev/null +++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl | |||
@@ -0,0 +1,686 @@ | |||
1 | #!/usr/bin/perl | ||
2 | # This is a POC for reading the text representation of trace output related to | ||
3 | # page reclaim. It makes an attempt to extract some high-level information on | ||
4 | # what is going on. The accuracy of the parser may vary | ||
5 | # | ||
6 | # Example usage: trace-vmscan-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe | ||
7 | # other options | ||
8 | # --read-procstat If the trace lacks process info, get it from /proc | ||
9 | # --ignore-pid Aggregate processes of the same name together | ||
10 | # | ||
11 | # Copyright (c) IBM Corporation 2009 | ||
12 | # Author: Mel Gorman <mel@csn.ul.ie> | ||
13 | use strict; | ||
14 | use Getopt::Long; | ||
15 | |||
16 | # Tracepoint events | ||
17 | use constant MM_VMSCAN_DIRECT_RECLAIM_BEGIN => 1; | ||
18 | use constant MM_VMSCAN_DIRECT_RECLAIM_END => 2; | ||
19 | use constant MM_VMSCAN_KSWAPD_WAKE => 3; | ||
20 | use constant MM_VMSCAN_KSWAPD_SLEEP => 4; | ||
21 | use constant MM_VMSCAN_LRU_SHRINK_ACTIVE => 5; | ||
22 | use constant MM_VMSCAN_LRU_SHRINK_INACTIVE => 6; | ||
23 | use constant MM_VMSCAN_LRU_ISOLATE => 7; | ||
24 | use constant MM_VMSCAN_WRITEPAGE_FILE_SYNC => 8; | ||
25 | use constant MM_VMSCAN_WRITEPAGE_ANON_SYNC => 9; | ||
26 | use constant MM_VMSCAN_WRITEPAGE_FILE_ASYNC => 10; | ||
27 | use constant MM_VMSCAN_WRITEPAGE_ANON_ASYNC => 11; | ||
28 | use constant MM_VMSCAN_WRITEPAGE_ASYNC => 12; | ||
29 | use constant EVENT_UNKNOWN => 13; | ||
30 | |||
31 | # Per-order events | ||
32 | use constant MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER => 11; | ||
33 | use constant MM_VMSCAN_WAKEUP_KSWAPD_PERORDER => 12; | ||
34 | use constant MM_VMSCAN_KSWAPD_WAKE_PERORDER => 13; | ||
35 | use constant HIGH_KSWAPD_REWAKEUP_PERORDER => 14; | ||
36 | |||
37 | # Constants used to track state | ||
38 | use constant STATE_DIRECT_BEGIN => 15; | ||
39 | use constant STATE_DIRECT_ORDER => 16; | ||
40 | use constant STATE_KSWAPD_BEGIN => 17; | ||
41 | use constant STATE_KSWAPD_ORDER => 18; | ||
42 | |||
43 | # High-level events extrapolated from tracepoints | ||
44 | use constant HIGH_DIRECT_RECLAIM_LATENCY => 19; | ||
45 | use constant HIGH_KSWAPD_LATENCY => 20; | ||
46 | use constant HIGH_KSWAPD_REWAKEUP => 21; | ||
47 | use constant HIGH_NR_SCANNED => 22; | ||
48 | use constant HIGH_NR_TAKEN => 23; | ||
49 | use constant HIGH_NR_RECLAIM => 24; | ||
50 | use constant HIGH_NR_CONTIG_DIRTY => 25; | ||
51 | |||
52 | my %perprocesspid; | ||
53 | my %perprocess; | ||
54 | my %last_procmap; | ||
55 | my $opt_ignorepid; | ||
56 | my $opt_read_procstat; | ||
57 | |||
58 | my $total_wakeup_kswapd; | ||
59 | my ($total_direct_reclaim, $total_direct_nr_scanned); | ||
60 | my ($total_direct_latency, $total_kswapd_latency); | ||
61 | my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async); | ||
62 | my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async); | ||
63 | my ($total_kswapd_nr_scanned, $total_kswapd_wake); | ||
64 | my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async); | ||
65 | my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async); | ||
66 | |||
67 | # Catch sigint and exit on request | ||
68 | my $sigint_report = 0; | ||
69 | my $sigint_exit = 0; | ||
70 | my $sigint_pending = 0; | ||
71 | my $sigint_received = 0; | ||
72 | sub sigint_handler { | ||
73 | my $current_time = time; | ||
74 | if ($current_time - 2 > $sigint_received) { | ||
75 | print "SIGINT received, report pending. Hit ctrl-c again to exit\n"; | ||
76 | $sigint_report = 1; | ||
77 | } else { | ||
78 | if (!$sigint_exit) { | ||
79 | print "Second SIGINT received quickly, exiting\n"; | ||
80 | } | ||
81 | $sigint_exit++; | ||
82 | } | ||
83 | |||
84 | if ($sigint_exit > 3) { | ||
85 | print "Many SIGINTs received, exiting now without report\n"; | ||
86 | exit; | ||
87 | } | ||
88 | |||
89 | $sigint_received = $current_time; | ||
90 | $sigint_pending = 1; | ||
91 | } | ||
92 | $SIG{INT} = "sigint_handler"; | ||
93 | |||
94 | # Parse command line options | ||
95 | GetOptions( | ||
96 | 'ignore-pid' => \$opt_ignorepid, | ||
97 | 'read-procstat' => \$opt_read_procstat, | ||
98 | ); | ||
99 | |||
100 | # Defaults for dynamically discovered regex's | ||
101 | my $regex_direct_begin_default = 'order=([0-9]*) may_writepage=([0-9]*) gfp_flags=([A-Z_|]*)'; | ||
102 | my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)'; | ||
103 | my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)'; | ||
104 | my $regex_kswapd_sleep_default = 'nid=([0-9]*)'; | ||
105 | my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)'; | ||
106 | my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_taken=([0-9]*) contig_taken=([0-9]*) contig_dirty=([0-9]*) contig_failed=([0-9]*)'; | ||
107 | my $regex_lru_shrink_inactive_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) priority=([0-9]*)'; | ||
108 | my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)'; | ||
109 | my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)'; | ||
110 | |||
111 | # Dyanically discovered regex | ||
112 | my $regex_direct_begin; | ||
113 | my $regex_direct_end; | ||
114 | my $regex_kswapd_wake; | ||
115 | my $regex_kswapd_sleep; | ||
116 | my $regex_wakeup_kswapd; | ||
117 | my $regex_lru_isolate; | ||
118 | my $regex_lru_shrink_inactive; | ||
119 | my $regex_lru_shrink_active; | ||
120 | my $regex_writepage; | ||
121 | |||
122 | # Static regex used. Specified like this for readability and for use with /o | ||
123 | # (process_pid) (cpus ) ( time ) (tpoint ) (details) | ||
124 | my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)'; | ||
125 | my $regex_statname = '[-0-9]*\s\((.*)\).*'; | ||
126 | my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*'; | ||
127 | |||
128 | sub generate_traceevent_regex { | ||
129 | my $event = shift; | ||
130 | my $default = shift; | ||
131 | my $regex; | ||
132 | |||
133 | # Read the event format or use the default | ||
134 | if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) { | ||
135 | print("WARNING: Event $event format string not found\n"); | ||
136 | return $default; | ||
137 | } else { | ||
138 | my $line; | ||
139 | while (!eof(FORMAT)) { | ||
140 | $line = <FORMAT>; | ||
141 | $line =~ s/, REC->.*//; | ||
142 | if ($line =~ /^print fmt:\s"(.*)".*/) { | ||
143 | $regex = $1; | ||
144 | $regex =~ s/%s/\([0-9a-zA-Z|_]*\)/g; | ||
145 | $regex =~ s/%p/\([0-9a-f]*\)/g; | ||
146 | $regex =~ s/%d/\([-0-9]*\)/g; | ||
147 | $regex =~ s/%ld/\([-0-9]*\)/g; | ||
148 | $regex =~ s/%lu/\([0-9]*\)/g; | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | # Can't handle the print_flags stuff but in the context of this | ||
154 | # script, it really doesn't matter | ||
155 | $regex =~ s/\(REC.*\) \? __print_flags.*//; | ||
156 | |||
157 | # Verify fields are in the right order | ||
158 | my $tuple; | ||
159 | foreach $tuple (split /\s/, $regex) { | ||
160 | my ($key, $value) = split(/=/, $tuple); | ||
161 | my $expected = shift; | ||
162 | if ($key ne $expected) { | ||
163 | print("WARNING: Format not as expected for event $event '$key' != '$expected'\n"); | ||
164 | $regex =~ s/$key=\((.*)\)/$key=$1/; | ||
165 | } | ||
166 | } | ||
167 | |||
168 | if (defined shift) { | ||
169 | die("Fewer fields than expected in format"); | ||
170 | } | ||
171 | |||
172 | return $regex; | ||
173 | } | ||
174 | |||
175 | $regex_direct_begin = generate_traceevent_regex( | ||
176 | "vmscan/mm_vmscan_direct_reclaim_begin", | ||
177 | $regex_direct_begin_default, | ||
178 | "order", "may_writepage", | ||
179 | "gfp_flags"); | ||
180 | $regex_direct_end = generate_traceevent_regex( | ||
181 | "vmscan/mm_vmscan_direct_reclaim_end", | ||
182 | $regex_direct_end_default, | ||
183 | "nr_reclaimed"); | ||
184 | $regex_kswapd_wake = generate_traceevent_regex( | ||
185 | "vmscan/mm_vmscan_kswapd_wake", | ||
186 | $regex_kswapd_wake_default, | ||
187 | "nid", "order"); | ||
188 | $regex_kswapd_sleep = generate_traceevent_regex( | ||
189 | "vmscan/mm_vmscan_kswapd_sleep", | ||
190 | $regex_kswapd_sleep_default, | ||
191 | "nid"); | ||
192 | $regex_wakeup_kswapd = generate_traceevent_regex( | ||
193 | "vmscan/mm_vmscan_wakeup_kswapd", | ||
194 | $regex_wakeup_kswapd_default, | ||
195 | "nid", "zid", "order"); | ||
196 | $regex_lru_isolate = generate_traceevent_regex( | ||
197 | "vmscan/mm_vmscan_lru_isolate", | ||
198 | $regex_lru_isolate_default, | ||
199 | "isolate_mode", "order", | ||
200 | "nr_requested", "nr_scanned", "nr_taken", | ||
201 | "contig_taken", "contig_dirty", "contig_failed"); | ||
202 | $regex_lru_shrink_inactive = generate_traceevent_regex( | ||
203 | "vmscan/mm_vmscan_lru_shrink_inactive", | ||
204 | $regex_lru_shrink_inactive_default, | ||
205 | "nid", "zid", | ||
206 | "lru", | ||
207 | "nr_scanned", "nr_reclaimed", "priority"); | ||
208 | $regex_lru_shrink_active = generate_traceevent_regex( | ||
209 | "vmscan/mm_vmscan_lru_shrink_active", | ||
210 | $regex_lru_shrink_active_default, | ||
211 | "nid", "zid", | ||
212 | "lru", | ||
213 | "nr_scanned", "nr_rotated", "priority"); | ||
214 | $regex_writepage = generate_traceevent_regex( | ||
215 | "vmscan/mm_vmscan_writepage", | ||
216 | $regex_writepage_default, | ||
217 | "page", "pfn", "flags"); | ||
218 | |||
219 | sub read_statline($) { | ||
220 | my $pid = $_[0]; | ||
221 | my $statline; | ||
222 | |||
223 | if (open(STAT, "/proc/$pid/stat")) { | ||
224 | $statline = <STAT>; | ||
225 | close(STAT); | ||
226 | } | ||
227 | |||
228 | if ($statline eq '') { | ||
229 | $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0"; | ||
230 | } | ||
231 | |||
232 | return $statline; | ||
233 | } | ||
234 | |||
235 | sub guess_process_pid($$) { | ||
236 | my $pid = $_[0]; | ||
237 | my $statline = $_[1]; | ||
238 | |||
239 | if ($pid == 0) { | ||
240 | return "swapper-0"; | ||
241 | } | ||
242 | |||
243 | if ($statline !~ /$regex_statname/o) { | ||
244 | die("Failed to math stat line for process name :: $statline"); | ||
245 | } | ||
246 | return "$1-$pid"; | ||
247 | } | ||
248 | |||
249 | # Convert sec.usec timestamp format | ||
250 | sub timestamp_to_ms($) { | ||
251 | my $timestamp = $_[0]; | ||
252 | |||
253 | my ($sec, $usec) = split (/\./, $timestamp); | ||
254 | return ($sec * 1000) + ($usec / 1000); | ||
255 | } | ||
256 | |||
257 | sub process_events { | ||
258 | my $traceevent; | ||
259 | my $process_pid; | ||
260 | my $cpus; | ||
261 | my $timestamp; | ||
262 | my $tracepoint; | ||
263 | my $details; | ||
264 | my $statline; | ||
265 | |||
266 | # Read each line of the event log | ||
267 | EVENT_PROCESS: | ||
268 | while ($traceevent = <STDIN>) { | ||
269 | if ($traceevent =~ /$regex_traceevent/o) { | ||
270 | $process_pid = $1; | ||
271 | $timestamp = $3; | ||
272 | $tracepoint = $4; | ||
273 | |||
274 | $process_pid =~ /(.*)-([0-9]*)$/; | ||
275 | my $process = $1; | ||
276 | my $pid = $2; | ||
277 | |||
278 | if ($process eq "") { | ||
279 | $process = $last_procmap{$pid}; | ||
280 | $process_pid = "$process-$pid"; | ||
281 | } | ||
282 | $last_procmap{$pid} = $process; | ||
283 | |||
284 | if ($opt_read_procstat) { | ||
285 | $statline = read_statline($pid); | ||
286 | if ($opt_read_procstat && $process eq '') { | ||
287 | $process_pid = guess_process_pid($pid, $statline); | ||
288 | } | ||
289 | } | ||
290 | } else { | ||
291 | next; | ||
292 | } | ||
293 | |||
294 | # Perl Switch() sucks majorly | ||
295 | if ($tracepoint eq "mm_vmscan_direct_reclaim_begin") { | ||
296 | $timestamp = timestamp_to_ms($timestamp); | ||
297 | $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}++; | ||
298 | $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN} = $timestamp; | ||
299 | |||
300 | $details = $5; | ||
301 | if ($details !~ /$regex_direct_begin/o) { | ||
302 | print "WARNING: Failed to parse mm_vmscan_direct_reclaim_begin as expected\n"; | ||
303 | print " $details\n"; | ||
304 | print " $regex_direct_begin\n"; | ||
305 | next; | ||
306 | } | ||
307 | my $order = $1; | ||
308 | $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order]++; | ||
309 | $perprocesspid{$process_pid}->{STATE_DIRECT_ORDER} = $order; | ||
310 | } elsif ($tracepoint eq "mm_vmscan_direct_reclaim_end") { | ||
311 | # Count the event itself | ||
312 | my $index = $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_END}; | ||
313 | $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_END}++; | ||
314 | |||
315 | # Record how long direct reclaim took this time | ||
316 | if (defined $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN}) { | ||
317 | $timestamp = timestamp_to_ms($timestamp); | ||
318 | my $order = $perprocesspid{$process_pid}->{STATE_DIRECT_ORDER}; | ||
319 | my $latency = ($timestamp - $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN}); | ||
320 | $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index] = "$order-$latency"; | ||
321 | } | ||
322 | } elsif ($tracepoint eq "mm_vmscan_kswapd_wake") { | ||
323 | $details = $5; | ||
324 | if ($details !~ /$regex_kswapd_wake/o) { | ||
325 | print "WARNING: Failed to parse mm_vmscan_kswapd_wake as expected\n"; | ||
326 | print " $details\n"; | ||
327 | print " $regex_kswapd_wake\n"; | ||
328 | next; | ||
329 | } | ||
330 | |||
331 | my $order = $2; | ||
332 | $perprocesspid{$process_pid}->{STATE_KSWAPD_ORDER} = $order; | ||
333 | if (!$perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN}) { | ||
334 | $timestamp = timestamp_to_ms($timestamp); | ||
335 | $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}++; | ||
336 | $perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN} = $timestamp; | ||
337 | $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order]++; | ||
338 | } else { | ||
339 | $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP}++; | ||
340 | $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP_PERORDER}[$order]++; | ||
341 | } | ||
342 | } elsif ($tracepoint eq "mm_vmscan_kswapd_sleep") { | ||
343 | |||
344 | # Count the event itself | ||
345 | my $index = $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_SLEEP}; | ||
346 | $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_SLEEP}++; | ||
347 | |||
348 | # Record how long kswapd was awake | ||
349 | $timestamp = timestamp_to_ms($timestamp); | ||
350 | my $order = $perprocesspid{$process_pid}->{STATE_KSWAPD_ORDER}; | ||
351 | my $latency = ($timestamp - $perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN}); | ||
352 | $perprocesspid{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index] = "$order-$latency"; | ||
353 | $perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN} = 0; | ||
354 | } elsif ($tracepoint eq "mm_vmscan_wakeup_kswapd") { | ||
355 | $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}++; | ||
356 | |||
357 | $details = $5; | ||
358 | if ($details !~ /$regex_wakeup_kswapd/o) { | ||
359 | print "WARNING: Failed to parse mm_vmscan_wakeup_kswapd as expected\n"; | ||
360 | print " $details\n"; | ||
361 | print " $regex_wakeup_kswapd\n"; | ||
362 | next; | ||
363 | } | ||
364 | my $order = $3; | ||
365 | $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]++; | ||
366 | } elsif ($tracepoint eq "mm_vmscan_lru_isolate") { | ||
367 | $details = $5; | ||
368 | if ($details !~ /$regex_lru_isolate/o) { | ||
369 | print "WARNING: Failed to parse mm_vmscan_lru_isolate as expected\n"; | ||
370 | print " $details\n"; | ||
371 | print " $regex_lru_isolate/o\n"; | ||
372 | next; | ||
373 | } | ||
374 | my $nr_scanned = $4; | ||
375 | my $nr_contig_dirty = $7; | ||
376 | $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned; | ||
377 | $perprocesspid{$process_pid}->{HIGH_NR_CONTIG_DIRTY} += $nr_contig_dirty; | ||
378 | } elsif ($tracepoint eq "mm_vmscan_writepage") { | ||
379 | $details = $5; | ||
380 | if ($details !~ /$regex_writepage/o) { | ||
381 | print "WARNING: Failed to parse mm_vmscan_writepage as expected\n"; | ||
382 | print " $details\n"; | ||
383 | print " $regex_writepage\n"; | ||
384 | next; | ||
385 | } | ||
386 | |||
387 | my $flags = $3; | ||
388 | my $file = 0; | ||
389 | my $sync_io = 0; | ||
390 | if ($flags =~ /RECLAIM_WB_FILE/) { | ||
391 | $file = 1; | ||
392 | } | ||
393 | if ($flags =~ /RECLAIM_WB_SYNC/) { | ||
394 | $sync_io = 1; | ||
395 | } | ||
396 | if ($sync_io) { | ||
397 | if ($file) { | ||
398 | $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}++; | ||
399 | } else { | ||
400 | $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}++; | ||
401 | } | ||
402 | } else { | ||
403 | if ($file) { | ||
404 | $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}++; | ||
405 | } else { | ||
406 | $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}++; | ||
407 | } | ||
408 | } | ||
409 | } else { | ||
410 | $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++; | ||
411 | } | ||
412 | |||
413 | if ($sigint_pending) { | ||
414 | last EVENT_PROCESS; | ||
415 | } | ||
416 | } | ||
417 | } | ||
418 | |||
419 | sub dump_stats { | ||
420 | my $hashref = shift; | ||
421 | my %stats = %$hashref; | ||
422 | |||
423 | # Dump per-process stats | ||
424 | my $process_pid; | ||
425 | my $max_strlen = 0; | ||
426 | |||
427 | # Get the maximum process name | ||
428 | foreach $process_pid (keys %perprocesspid) { | ||
429 | my $len = length($process_pid); | ||
430 | if ($len > $max_strlen) { | ||
431 | $max_strlen = $len; | ||
432 | } | ||
433 | } | ||
434 | $max_strlen += 2; | ||
435 | |||
436 | # Work out latencies | ||
437 | printf("\n") if !$opt_ignorepid; | ||
438 | printf("Reclaim latencies expressed as order-latency_in_ms\n") if !$opt_ignorepid; | ||
439 | foreach $process_pid (keys %stats) { | ||
440 | |||
441 | if (!$stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[0] && | ||
442 | !$stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[0]) { | ||
443 | next; | ||
444 | } | ||
445 | |||
446 | printf "%-" . $max_strlen . "s ", $process_pid if !$opt_ignorepid; | ||
447 | my $index = 0; | ||
448 | while (defined $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index] || | ||
449 | defined $stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index]) { | ||
450 | |||
451 | if ($stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]) { | ||
452 | printf("%s ", $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]) if !$opt_ignorepid; | ||
453 | my ($dummy, $latency) = split(/-/, $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]); | ||
454 | $total_direct_latency += $latency; | ||
455 | } else { | ||
456 | printf("%s ", $stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index]) if !$opt_ignorepid; | ||
457 | my ($dummy, $latency) = split(/-/, $stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index]); | ||
458 | $total_kswapd_latency += $latency; | ||
459 | } | ||
460 | $index++; | ||
461 | } | ||
462 | print "\n" if !$opt_ignorepid; | ||
463 | } | ||
464 | |||
465 | # Print out process activity | ||
466 | printf("\n"); | ||
467 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s\n", "Process", "Direct", "Wokeup", "Pages", "Pages", "Pages", "Time"); | ||
468 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s\n", "details", "Rclms", "Kswapd", "Scanned", "Sync-IO", "ASync-IO", "Stalled"); | ||
469 | foreach $process_pid (keys %stats) { | ||
470 | |||
471 | if (!$stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}) { | ||
472 | next; | ||
473 | } | ||
474 | |||
475 | $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}; | ||
476 | $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}; | ||
477 | $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED}; | ||
478 | $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; | ||
479 | $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; | ||
480 | $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; | ||
481 | |||
482 | $total_direct_writepage_anon_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}; | ||
483 | |||
484 | my $index = 0; | ||
485 | my $this_reclaim_delay = 0; | ||
486 | while (defined $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]) { | ||
487 | my ($dummy, $latency) = split(/-/, $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]); | ||
488 | $this_reclaim_delay += $latency; | ||
489 | $index++; | ||
490 | } | ||
491 | |||
492 | printf("%-" . $max_strlen . "s %8d %10d %8u %8u %8u %8.3f", | ||
493 | $process_pid, | ||
494 | $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}, | ||
495 | $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}, | ||
496 | $stats{$process_pid}->{HIGH_NR_SCANNED}, | ||
497 | $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}, | ||
498 | $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}, | ||
499 | $this_reclaim_delay / 1000); | ||
500 | |||
501 | if ($stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}) { | ||
502 | print " "; | ||
503 | for (my $order = 0; $order < 20; $order++) { | ||
504 | my $count = $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order]; | ||
505 | if ($count != 0) { | ||
506 | print "direct-$order=$count "; | ||
507 | } | ||
508 | } | ||
509 | } | ||
510 | if ($stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}) { | ||
511 | print " "; | ||
512 | for (my $order = 0; $order < 20; $order++) { | ||
513 | my $count = $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]; | ||
514 | if ($count != 0) { | ||
515 | print "wakeup-$order=$count "; | ||
516 | } | ||
517 | } | ||
518 | } | ||
519 | if ($stats{$process_pid}->{HIGH_NR_CONTIG_DIRTY}) { | ||
520 | print " "; | ||
521 | my $count = $stats{$process_pid}->{HIGH_NR_CONTIG_DIRTY}; | ||
522 | if ($count != 0) { | ||
523 | print "contig-dirty=$count "; | ||
524 | } | ||
525 | } | ||
526 | |||
527 | print "\n"; | ||
528 | } | ||
529 | |||
530 | # Print out kswapd activity | ||
531 | printf("\n"); | ||
532 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s\n", "Kswapd", "Kswapd", "Order", "Pages", "Pages", "Pages"); | ||
533 | printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s\n", "Instance", "Wakeups", "Re-wakeup", "Scanned", "Sync-IO", "ASync-IO"); | ||
534 | foreach $process_pid (keys %stats) { | ||
535 | |||
536 | if (!$stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}) { | ||
537 | next; | ||
538 | } | ||
539 | |||
540 | $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}; | ||
541 | $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED}; | ||
542 | $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; | ||
543 | $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; | ||
544 | $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; | ||
545 | $total_kswapd_writepage_anon_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}; | ||
546 | |||
547 | printf("%-" . $max_strlen . "s %8d %10d %8u %8i %8u", | ||
548 | $process_pid, | ||
549 | $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}, | ||
550 | $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP}, | ||
551 | $stats{$process_pid}->{HIGH_NR_SCANNED}, | ||
552 | $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}, | ||
553 | $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}); | ||
554 | |||
555 | if ($stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}) { | ||
556 | print " "; | ||
557 | for (my $order = 0; $order < 20; $order++) { | ||
558 | my $count = $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order]; | ||
559 | if ($count != 0) { | ||
560 | print "wake-$order=$count "; | ||
561 | } | ||
562 | } | ||
563 | } | ||
564 | if ($stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP}) { | ||
565 | print " "; | ||
566 | for (my $order = 0; $order < 20; $order++) { | ||
567 | my $count = $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP_PERORDER}[$order]; | ||
568 | if ($count != 0) { | ||
569 | print "rewake-$order=$count "; | ||
570 | } | ||
571 | } | ||
572 | } | ||
573 | printf("\n"); | ||
574 | } | ||
575 | |||
576 | # Print out summaries | ||
577 | $total_direct_latency /= 1000; | ||
578 | $total_kswapd_latency /= 1000; | ||
579 | print "\nSummary\n"; | ||
580 | print "Direct reclaims: $total_direct_reclaim\n"; | ||
581 | print "Direct reclaim pages scanned: $total_direct_nr_scanned\n"; | ||
582 | print "Direct reclaim write file sync I/O: $total_direct_writepage_file_sync\n"; | ||
583 | print "Direct reclaim write anon sync I/O: $total_direct_writepage_anon_sync\n"; | ||
584 | print "Direct reclaim write file async I/O: $total_direct_writepage_file_async\n"; | ||
585 | print "Direct reclaim write anon async I/O: $total_direct_writepage_anon_async\n"; | ||
586 | print "Wake kswapd requests: $total_wakeup_kswapd\n"; | ||
587 | printf "Time stalled direct reclaim: %-1.2f seconds\n", $total_direct_latency; | ||
588 | print "\n"; | ||
589 | print "Kswapd wakeups: $total_kswapd_wake\n"; | ||
590 | print "Kswapd pages scanned: $total_kswapd_nr_scanned\n"; | ||
591 | print "Kswapd reclaim write file sync I/O: $total_kswapd_writepage_file_sync\n"; | ||
592 | print "Kswapd reclaim write anon sync I/O: $total_kswapd_writepage_anon_sync\n"; | ||
593 | print "Kswapd reclaim write file async I/O: $total_kswapd_writepage_file_async\n"; | ||
594 | print "Kswapd reclaim write anon async I/O: $total_kswapd_writepage_anon_async\n"; | ||
595 | printf "Time kswapd awake: %-1.2f seconds\n", $total_kswapd_latency; | ||
596 | } | ||
597 | |||
598 | sub aggregate_perprocesspid() { | ||
599 | my $process_pid; | ||
600 | my $process; | ||
601 | undef %perprocess; | ||
602 | |||
603 | foreach $process_pid (keys %perprocesspid) { | ||
604 | $process = $process_pid; | ||
605 | $process =~ s/-([0-9])*$//; | ||
606 | if ($process eq '') { | ||
607 | $process = "NO_PROCESS_NAME"; | ||
608 | } | ||
609 | |||
610 | $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN} += $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}; | ||
611 | $perprocess{$process}->{MM_VMSCAN_KSWAPD_WAKE} += $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}; | ||
612 | $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}; | ||
613 | $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP}; | ||
614 | $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED}; | ||
615 | $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}; | ||
616 | $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}; | ||
617 | $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}; | ||
618 | $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}; | ||
619 | |||
620 | for (my $order = 0; $order < 20; $order++) { | ||
621 | $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order] += $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order]; | ||
622 | $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order] += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]; | ||
623 | $perprocess{$process}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order] += $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order]; | ||
624 | |||
625 | } | ||
626 | |||
627 | # Aggregate direct reclaim latencies | ||
628 | my $wr_index = $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_END}; | ||
629 | my $rd_index = 0; | ||
630 | while (defined $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$rd_index]) { | ||
631 | $perprocess{$process}->{HIGH_DIRECT_RECLAIM_LATENCY}[$wr_index] = $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$rd_index]; | ||
632 | $rd_index++; | ||
633 | $wr_index++; | ||
634 | } | ||
635 | $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_END} = $wr_index; | ||
636 | |||
637 | # Aggregate kswapd latencies | ||
638 | my $wr_index = $perprocess{$process}->{MM_VMSCAN_KSWAPD_SLEEP}; | ||
639 | my $rd_index = 0; | ||
640 | while (defined $perprocesspid{$process_pid}->{HIGH_KSWAPD_LATENCY}[$rd_index]) { | ||
641 | $perprocess{$process}->{HIGH_KSWAPD_LATENCY}[$wr_index] = $perprocesspid{$process_pid}->{HIGH_KSWAPD_LATENCY}[$rd_index]; | ||
642 | $rd_index++; | ||
643 | $wr_index++; | ||
644 | } | ||
645 | $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_END} = $wr_index; | ||
646 | } | ||
647 | } | ||
648 | |||
649 | sub report() { | ||
650 | if (!$opt_ignorepid) { | ||
651 | dump_stats(\%perprocesspid); | ||
652 | } else { | ||
653 | aggregate_perprocesspid(); | ||
654 | dump_stats(\%perprocess); | ||
655 | } | ||
656 | } | ||
657 | |||
658 | # Process events or signals until neither is available | ||
659 | sub signal_loop() { | ||
660 | my $sigint_processed; | ||
661 | do { | ||
662 | $sigint_processed = 0; | ||
663 | process_events(); | ||
664 | |||
665 | # Handle pending signals if any | ||
666 | if ($sigint_pending) { | ||
667 | my $current_time = time; | ||
668 | |||
669 | if ($sigint_exit) { | ||
670 | print "Received exit signal\n"; | ||
671 | $sigint_pending = 0; | ||
672 | } | ||
673 | if ($sigint_report) { | ||
674 | if ($current_time >= $sigint_received + 2) { | ||
675 | report(); | ||
676 | $sigint_report = 0; | ||
677 | $sigint_pending = 0; | ||
678 | $sigint_processed = 1; | ||
679 | } | ||
680 | } | ||
681 | } | ||
682 | } while ($sigint_pending || $sigint_processed); | ||
683 | } | ||
684 | |||
685 | signal_loop(); | ||
686 | report(); | ||