diff options
author | Christoph Lameter <clameter@sgi.com> | 2008-02-07 20:47:41 -0500 |
---|---|---|
committer | Christoph Lameter <christoph@stapp.engr.sgi.com> | 2008-02-07 20:47:41 -0500 |
commit | 8ff12cfc009a2a38d87fa7058226fe197bb2696f (patch) | |
tree | 1358ed247d3c897d8790342a978dd5078354a207 /Documentation | |
parent | 1f84260c8ce3b1ce26d4c1d6dedc2f33a3a29c0c (diff) |
SLUB: Support for performance statistics
The statistics provided here allow the monitoring of allocator behavior but
at the cost of some (minimal) loss of performance. Counters are placed in
SLUB's per cpu data structure. The per cpu structure may be extended by the
statistics to grow larger than one cacheline which will increase the cache
footprint of SLUB.
There is a compile option to enable/disable the inclusion of the runtime
statistics and its off by default.
The slabinfo tool is enhanced to support these statistics via two options:
-D Switches the line of information displayed for a slab from size
mode to activity mode.
-A Sorts the slabs displayed by activity. This allows the display of
the slabs most important to the performance of a certain load.
-r Report option will report detailed statistics on
Example (tbench load):
slabinfo -AD ->Shows the most active slabs
Name Objects Alloc Free %Fast
skbuff_fclone_cache 33 111953835 111953835 99 99
:0000192 2666 5283688 5281047 99 99
:0001024 849 5247230 5246389 83 83
vm_area_struct 1349 119642 118355 91 22
:0004096 15 66753 66751 98 98
:0000064 2067 25297 23383 98 78
dentry 10259 28635 18464 91 45
:0000080 11004 18950 8089 98 98
:0000096 1703 12358 10784 99 98
:0000128 762 10582 9875 94 18
:0000512 184 9807 9647 95 81
:0002048 479 9669 9195 83 65
anon_vma 777 9461 9002 99 71
kmalloc-8 6492 9981 5624 99 97
:0000768 258 7174 6931 58 15
So the skbuff_fclone_cache is of highest importance for the tbench load.
Pretty high load on the 192 sized slab. Look for the aliases
slabinfo -a | grep 000192
:0000192 <- xfs_btree_cur filp kmalloc-192 uid_cache tw_sock_TCP
request_sock_TCPv6 tw_sock_TCPv6 skbuff_head_cache xfs_ili
Likely skbuff_head_cache.
Looking into the statistics of the skbuff_fclone_cache is possible through
slabinfo skbuff_fclone_cache ->-r option implied if cache name is mentioned
.... Usual output ...
Slab Perf Counter Alloc Free %Al %Fr
--------------------------------------------------
Fastpath 111953360 111946981 99 99
Slowpath 1044 7423 0 0
Page Alloc 272 264 0 0
Add partial 25 325 0 0
Remove partial 86 264 0 0
RemoteObj/SlabFrozen 350 4832 0 0
Total 111954404 111954404
Flushes 49 Refill 0
Deactivate Full=325(92%) Empty=0(0%) ToHead=24(6%) ToTail=1(0%)
Looks good because the fastpath is overwhelmingly taken.
skbuff_head_cache:
Slab Perf Counter Alloc Free %Al %Fr
--------------------------------------------------
Fastpath 5297262 5259882 99 99
Slowpath 4477 39586 0 0
Page Alloc 937 824 0 0
Add partial 0 2515 0 0
Remove partial 1691 824 0 0
RemoteObj/SlabFrozen 2621 9684 0 0
Total 5301739 5299468
Deactivate Full=2620(100%) Empty=0(0%) ToHead=0(0%) ToTail=0(0%)
Descriptions of the output:
Total: The total number of allocation and frees that occurred for a
slab
Fastpath: The number of allocations/frees that used the fastpath.
Slowpath: Other allocations
Page Alloc: Number of calls to the page allocator as a result of slowpath
processing
Add Partial: Number of slabs added to the partial list through free or
alloc (occurs during cpuslab flushes)
Remove Partial: Number of slabs removed from the partial list as a result of
allocations retrieving a partial slab or by a free freeing
the last object of a slab.
RemoteObj/Froz: How many times were remotely freed object encountered when a
slab was about to be deactivated. Frozen: How many times was
free able to skip list processing because the slab was in use
as the cpuslab of another processor.
Flushes: Number of times the cpuslab was flushed on request
(kmem_cache_shrink, may result from races in __slab_alloc)
Refill: Number of times we were able to refill the cpuslab from
remotely freed objects for the same slab.
Deactivate: Statistics how slabs were deactivated. Shows how they were
put onto the partial list.
In general fastpath is very good. Slowpath without partial list processing is
also desirable. Any touching of partial list uses node specific locks which
may potentially cause list lock contention.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/vm/slabinfo.c | 149 |
1 files changed, 138 insertions, 11 deletions
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 488c1f31b99..7123fee708c 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c | |||
@@ -32,6 +32,13 @@ struct slabinfo { | |||
32 | int sanity_checks, slab_size, store_user, trace; | 32 | int sanity_checks, slab_size, store_user, trace; |
33 | int order, poison, reclaim_account, red_zone; | 33 | int order, poison, reclaim_account, red_zone; |
34 | unsigned long partial, objects, slabs; | 34 | unsigned long partial, objects, slabs; |
35 | unsigned long alloc_fastpath, alloc_slowpath; | ||
36 | unsigned long free_fastpath, free_slowpath; | ||
37 | unsigned long free_frozen, free_add_partial, free_remove_partial; | ||
38 | unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; | ||
39 | unsigned long cpuslab_flush, deactivate_full, deactivate_empty; | ||
40 | unsigned long deactivate_to_head, deactivate_to_tail; | ||
41 | unsigned long deactivate_remote_frees; | ||
35 | int numa[MAX_NODES]; | 42 | int numa[MAX_NODES]; |
36 | int numa_partial[MAX_NODES]; | 43 | int numa_partial[MAX_NODES]; |
37 | } slabinfo[MAX_SLABS]; | 44 | } slabinfo[MAX_SLABS]; |
@@ -64,8 +71,10 @@ int show_inverted = 0; | |||
64 | int show_single_ref = 0; | 71 | int show_single_ref = 0; |
65 | int show_totals = 0; | 72 | int show_totals = 0; |
66 | int sort_size = 0; | 73 | int sort_size = 0; |
74 | int sort_active = 0; | ||
67 | int set_debug = 0; | 75 | int set_debug = 0; |
68 | int show_ops = 0; | 76 | int show_ops = 0; |
77 | int show_activity = 0; | ||
69 | 78 | ||
70 | /* Debug options */ | 79 | /* Debug options */ |
71 | int sanity = 0; | 80 | int sanity = 0; |
@@ -93,8 +102,10 @@ void usage(void) | |||
93 | printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" | 102 | printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" |
94 | "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" | 103 | "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" |
95 | "-a|--aliases Show aliases\n" | 104 | "-a|--aliases Show aliases\n" |
105 | "-A|--activity Most active slabs first\n" | ||
96 | "-d<options>|--debug=<options> Set/Clear Debug options\n" | 106 | "-d<options>|--debug=<options> Set/Clear Debug options\n" |
97 | "-e|--empty Show empty slabs\n" | 107 | "-D|--display-active Switch line format to activity\n" |
108 | "-e|--empty Show empty slabs\n" | ||
98 | "-f|--first-alias Show first alias\n" | 109 | "-f|--first-alias Show first alias\n" |
99 | "-h|--help Show usage information\n" | 110 | "-h|--help Show usage information\n" |
100 | "-i|--inverted Inverted list\n" | 111 | "-i|--inverted Inverted list\n" |
@@ -281,8 +292,11 @@ int line = 0; | |||
281 | 292 | ||
282 | void first_line(void) | 293 | void first_line(void) |
283 | { | 294 | { |
284 | printf("Name Objects Objsize Space " | 295 | if (show_activity) |
285 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); | 296 | printf("Name Objects Alloc Free %%Fast\n"); |
297 | else | ||
298 | printf("Name Objects Objsize Space " | ||
299 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); | ||
286 | } | 300 | } |
287 | 301 | ||
288 | /* | 302 | /* |
@@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s) | |||
309 | return s->slabs * (page_size << s->order); | 323 | return s->slabs * (page_size << s->order); |
310 | } | 324 | } |
311 | 325 | ||
326 | unsigned long slab_activity(struct slabinfo *s) | ||
327 | { | ||
328 | return s->alloc_fastpath + s->free_fastpath + | ||
329 | s->alloc_slowpath + s->free_slowpath; | ||
330 | } | ||
331 | |||
312 | void slab_numa(struct slabinfo *s, int mode) | 332 | void slab_numa(struct slabinfo *s, int mode) |
313 | { | 333 | { |
314 | int node; | 334 | int node; |
@@ -392,6 +412,71 @@ const char *onoff(int x) | |||
392 | return "Off"; | 412 | return "Off"; |
393 | } | 413 | } |
394 | 414 | ||
415 | void slab_stats(struct slabinfo *s) | ||
416 | { | ||
417 | unsigned long total_alloc; | ||
418 | unsigned long total_free; | ||
419 | unsigned long total; | ||
420 | |||
421 | if (!s->alloc_slab) | ||
422 | return; | ||
423 | |||
424 | total_alloc = s->alloc_fastpath + s->alloc_slowpath; | ||
425 | total_free = s->free_fastpath + s->free_slowpath; | ||
426 | |||
427 | if (!total_alloc) | ||
428 | return; | ||
429 | |||
430 | printf("\n"); | ||
431 | printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); | ||
432 | printf("--------------------------------------------------\n"); | ||
433 | printf("Fastpath %8lu %8lu %3lu %3lu\n", | ||
434 | s->alloc_fastpath, s->free_fastpath, | ||
435 | s->alloc_fastpath * 100 / total_alloc, | ||
436 | s->free_fastpath * 100 / total_free); | ||
437 | printf("Slowpath %8lu %8lu %3lu %3lu\n", | ||
438 | total_alloc - s->alloc_fastpath, s->free_slowpath, | ||
439 | (total_alloc - s->alloc_fastpath) * 100 / total_alloc, | ||
440 | s->free_slowpath * 100 / total_free); | ||
441 | printf("Page Alloc %8lu %8lu %3lu %3lu\n", | ||
442 | s->alloc_slab, s->free_slab, | ||
443 | s->alloc_slab * 100 / total_alloc, | ||
444 | s->free_slab * 100 / total_free); | ||
445 | printf("Add partial %8lu %8lu %3lu %3lu\n", | ||
446 | s->deactivate_to_head + s->deactivate_to_tail, | ||
447 | s->free_add_partial, | ||
448 | (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, | ||
449 | s->free_add_partial * 100 / total_free); | ||
450 | printf("Remove partial %8lu %8lu %3lu %3lu\n", | ||
451 | s->alloc_from_partial, s->free_remove_partial, | ||
452 | s->alloc_from_partial * 100 / total_alloc, | ||
453 | s->free_remove_partial * 100 / total_free); | ||
454 | |||
455 | printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", | ||
456 | s->deactivate_remote_frees, s->free_frozen, | ||
457 | s->deactivate_remote_frees * 100 / total_alloc, | ||
458 | s->free_frozen * 100 / total_free); | ||
459 | |||
460 | printf("Total %8lu %8lu\n\n", total_alloc, total_free); | ||
461 | |||
462 | if (s->cpuslab_flush) | ||
463 | printf("Flushes %8lu\n", s->cpuslab_flush); | ||
464 | |||
465 | if (s->alloc_refill) | ||
466 | printf("Refill %8lu\n", s->alloc_refill); | ||
467 | |||
468 | total = s->deactivate_full + s->deactivate_empty + | ||
469 | s->deactivate_to_head + s->deactivate_to_tail; | ||
470 | |||
471 | if (total) | ||
472 | printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) " | ||
473 | "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n", | ||
474 | s->deactivate_full, (s->deactivate_full * 100) / total, | ||
475 | s->deactivate_empty, (s->deactivate_empty * 100) / total, | ||
476 | s->deactivate_to_head, (s->deactivate_to_head * 100) / total, | ||
477 | s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); | ||
478 | } | ||
479 | |||
395 | void report(struct slabinfo *s) | 480 | void report(struct slabinfo *s) |
396 | { | 481 | { |
397 | if (strcmp(s->name, "*") == 0) | 482 | if (strcmp(s->name, "*") == 0) |
@@ -430,6 +515,7 @@ void report(struct slabinfo *s) | |||
430 | ops(s); | 515 | ops(s); |
431 | show_tracking(s); | 516 | show_tracking(s); |
432 | slab_numa(s, 1); | 517 | slab_numa(s, 1); |
518 | slab_stats(s); | ||
433 | } | 519 | } |
434 | 520 | ||
435 | void slabcache(struct slabinfo *s) | 521 | void slabcache(struct slabinfo *s) |
@@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s) | |||
479 | *p++ = 'T'; | 565 | *p++ = 'T'; |
480 | 566 | ||
481 | *p = 0; | 567 | *p = 0; |
482 | printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", | 568 | if (show_activity) { |
483 | s->name, s->objects, s->object_size, size_str, dist_str, | 569 | unsigned long total_alloc; |
484 | s->objs_per_slab, s->order, | 570 | unsigned long total_free; |
485 | s->slabs ? (s->partial * 100) / s->slabs : 100, | 571 | |
486 | s->slabs ? (s->objects * s->object_size * 100) / | 572 | total_alloc = s->alloc_fastpath + s->alloc_slowpath; |
487 | (s->slabs * (page_size << s->order)) : 100, | 573 | total_free = s->free_fastpath + s->free_slowpath; |
488 | flags); | 574 | |
575 | printf("%-21s %8ld %8ld %8ld %3ld %3ld \n", | ||
576 | s->name, s->objects, | ||
577 | total_alloc, total_free, | ||
578 | total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, | ||
579 | total_free ? (s->free_fastpath * 100 / total_free) : 0); | ||
580 | } | ||
581 | else | ||
582 | printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", | ||
583 | s->name, s->objects, s->object_size, size_str, dist_str, | ||
584 | s->objs_per_slab, s->order, | ||
585 | s->slabs ? (s->partial * 100) / s->slabs : 100, | ||
586 | s->slabs ? (s->objects * s->object_size * 100) / | ||
587 | (s->slabs * (page_size << s->order)) : 100, | ||
588 | flags); | ||
489 | } | 589 | } |
490 | 590 | ||
491 | /* | 591 | /* |
@@ -892,6 +992,8 @@ void sort_slabs(void) | |||
892 | 992 | ||
893 | if (sort_size) | 993 | if (sort_size) |
894 | result = slab_size(s1) < slab_size(s2); | 994 | result = slab_size(s1) < slab_size(s2); |
995 | else if (sort_active) | ||
996 | result = slab_activity(s1) < slab_activity(s2); | ||
895 | else | 997 | else |
896 | result = strcasecmp(s1->name, s2->name); | 998 | result = strcasecmp(s1->name, s2->name); |
897 | 999 | ||
@@ -1074,6 +1176,23 @@ void read_slab_dir(void) | |||
1074 | free(t); | 1176 | free(t); |
1075 | slab->store_user = get_obj("store_user"); | 1177 | slab->store_user = get_obj("store_user"); |
1076 | slab->trace = get_obj("trace"); | 1178 | slab->trace = get_obj("trace"); |
1179 | slab->alloc_fastpath = get_obj("alloc_fastpath"); | ||
1180 | slab->alloc_slowpath = get_obj("alloc_slowpath"); | ||
1181 | slab->free_fastpath = get_obj("free_fastpath"); | ||
1182 | slab->free_slowpath = get_obj("free_slowpath"); | ||
1183 | slab->free_frozen= get_obj("free_frozen"); | ||
1184 | slab->free_add_partial = get_obj("free_add_partial"); | ||
1185 | slab->free_remove_partial = get_obj("free_remove_partial"); | ||
1186 | slab->alloc_from_partial = get_obj("alloc_from_partial"); | ||
1187 | slab->alloc_slab = get_obj("alloc_slab"); | ||
1188 | slab->alloc_refill = get_obj("alloc_refill"); | ||
1189 | slab->free_slab = get_obj("free_slab"); | ||
1190 | slab->cpuslab_flush = get_obj("cpuslab_flush"); | ||
1191 | slab->deactivate_full = get_obj("deactivate_full"); | ||
1192 | slab->deactivate_empty = get_obj("deactivate_empty"); | ||
1193 | slab->deactivate_to_head = get_obj("deactivate_to_head"); | ||
1194 | slab->deactivate_to_tail = get_obj("deactivate_to_tail"); | ||
1195 | slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); | ||
1077 | chdir(".."); | 1196 | chdir(".."); |
1078 | if (slab->name[0] == ':') | 1197 | if (slab->name[0] == ':') |
1079 | alias_targets++; | 1198 | alias_targets++; |
@@ -1124,7 +1243,9 @@ void output_slabs(void) | |||
1124 | 1243 | ||
1125 | struct option opts[] = { | 1244 | struct option opts[] = { |
1126 | { "aliases", 0, NULL, 'a' }, | 1245 | { "aliases", 0, NULL, 'a' }, |
1246 | { "activity", 0, NULL, 'A' }, | ||
1127 | { "debug", 2, NULL, 'd' }, | 1247 | { "debug", 2, NULL, 'd' }, |
1248 | { "display-activity", 0, NULL, 'D' }, | ||
1128 | { "empty", 0, NULL, 'e' }, | 1249 | { "empty", 0, NULL, 'e' }, |
1129 | { "first-alias", 0, NULL, 'f' }, | 1250 | { "first-alias", 0, NULL, 'f' }, |
1130 | { "help", 0, NULL, 'h' }, | 1251 | { "help", 0, NULL, 'h' }, |
@@ -1149,7 +1270,7 @@ int main(int argc, char *argv[]) | |||
1149 | 1270 | ||
1150 | page_size = getpagesize(); | 1271 | page_size = getpagesize(); |
1151 | 1272 | ||
1152 | while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", | 1273 | while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS", |
1153 | opts, NULL)) != -1) | 1274 | opts, NULL)) != -1) |
1154 | switch (c) { | 1275 | switch (c) { |
1155 | case '1': | 1276 | case '1': |
@@ -1158,11 +1279,17 @@ int main(int argc, char *argv[]) | |||
1158 | case 'a': | 1279 | case 'a': |
1159 | show_alias = 1; | 1280 | show_alias = 1; |
1160 | break; | 1281 | break; |
1282 | case 'A': | ||
1283 | sort_active = 1; | ||
1284 | break; | ||
1161 | case 'd': | 1285 | case 'd': |
1162 | set_debug = 1; | 1286 | set_debug = 1; |
1163 | if (!debug_opt_scan(optarg)) | 1287 | if (!debug_opt_scan(optarg)) |
1164 | fatal("Invalid debug option '%s'\n", optarg); | 1288 | fatal("Invalid debug option '%s'\n", optarg); |
1165 | break; | 1289 | break; |
1290 | case 'D': | ||
1291 | show_activity = 1; | ||
1292 | break; | ||
1166 | case 'e': | 1293 | case 'e': |
1167 | show_empty = 1; | 1294 | show_empty = 1; |
1168 | break; | 1295 | break; |