aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2008-02-07 20:47:41 -0500
committerChristoph Lameter <christoph@stapp.engr.sgi.com>2008-02-07 20:47:41 -0500
commit8ff12cfc009a2a38d87fa7058226fe197bb2696f (patch)
tree1358ed247d3c897d8790342a978dd5078354a207
parent1f84260c8ce3b1ce26d4c1d6dedc2f33a3a29c0c (diff)
SLUB: Support for performance statistics
The statistics provided here allow the monitoring of allocator behavior but at the cost of some (minimal) loss of performance. Counters are placed in SLUB's per cpu data structure. The per cpu structure may be extended by the statistics to grow larger than one cacheline which will increase the cache footprint of SLUB. There is a compile option to enable/disable the inclusion of the runtime statistics and its off by default. The slabinfo tool is enhanced to support these statistics via two options: -D Switches the line of information displayed for a slab from size mode to activity mode. -A Sorts the slabs displayed by activity. This allows the display of the slabs most important to the performance of a certain load. -r Report option will report detailed statistics on Example (tbench load): slabinfo -AD ->Shows the most active slabs Name Objects Alloc Free %Fast skbuff_fclone_cache 33 111953835 111953835 99 99 :0000192 2666 5283688 5281047 99 99 :0001024 849 5247230 5246389 83 83 vm_area_struct 1349 119642 118355 91 22 :0004096 15 66753 66751 98 98 :0000064 2067 25297 23383 98 78 dentry 10259 28635 18464 91 45 :0000080 11004 18950 8089 98 98 :0000096 1703 12358 10784 99 98 :0000128 762 10582 9875 94 18 :0000512 184 9807 9647 95 81 :0002048 479 9669 9195 83 65 anon_vma 777 9461 9002 99 71 kmalloc-8 6492 9981 5624 99 97 :0000768 258 7174 6931 58 15 So the skbuff_fclone_cache is of highest importance for the tbench load. Pretty high load on the 192 sized slab. Look for the aliases slabinfo -a | grep 000192 :0000192 <- xfs_btree_cur filp kmalloc-192 uid_cache tw_sock_TCP request_sock_TCPv6 tw_sock_TCPv6 skbuff_head_cache xfs_ili Likely skbuff_head_cache. Looking into the statistics of the skbuff_fclone_cache is possible through slabinfo skbuff_fclone_cache ->-r option implied if cache name is mentioned .... Usual output ... Slab Perf Counter Alloc Free %Al %Fr -------------------------------------------------- Fastpath 111953360 111946981 99 99 Slowpath 1044 7423 0 0 Page Alloc 272 264 0 0 Add partial 25 325 0 0 Remove partial 86 264 0 0 RemoteObj/SlabFrozen 350 4832 0 0 Total 111954404 111954404 Flushes 49 Refill 0 Deactivate Full=325(92%) Empty=0(0%) ToHead=24(6%) ToTail=1(0%) Looks good because the fastpath is overwhelmingly taken. skbuff_head_cache: Slab Perf Counter Alloc Free %Al %Fr -------------------------------------------------- Fastpath 5297262 5259882 99 99 Slowpath 4477 39586 0 0 Page Alloc 937 824 0 0 Add partial 0 2515 0 0 Remove partial 1691 824 0 0 RemoteObj/SlabFrozen 2621 9684 0 0 Total 5301739 5299468 Deactivate Full=2620(100%) Empty=0(0%) ToHead=0(0%) ToTail=0(0%) Descriptions of the output: Total: The total number of allocation and frees that occurred for a slab Fastpath: The number of allocations/frees that used the fastpath. Slowpath: Other allocations Page Alloc: Number of calls to the page allocator as a result of slowpath processing Add Partial: Number of slabs added to the partial list through free or alloc (occurs during cpuslab flushes) Remove Partial: Number of slabs removed from the partial list as a result of allocations retrieving a partial slab or by a free freeing the last object of a slab. RemoteObj/Froz: How many times were remotely freed object encountered when a slab was about to be deactivated. Frozen: How many times was free able to skip list processing because the slab was in use as the cpuslab of another processor. Flushes: Number of times the cpuslab was flushed on request (kmem_cache_shrink, may result from races in __slab_alloc) Refill: Number of times we were able to refill the cpuslab from remotely freed objects for the same slab. Deactivate: Statistics how slabs were deactivated. Shows how they were put onto the partial list. In general fastpath is very good. Slowpath without partial list processing is also desirable. Any touching of partial list uses node specific locks which may potentially cause list lock contention. Signed-off-by: Christoph Lameter <clameter@sgi.com>
-rw-r--r--Documentation/vm/slabinfo.c149
-rw-r--r--include/linux/slub_def.h23
-rw-r--r--lib/Kconfig.debug13
-rw-r--r--mm/slub.c127
4 files changed, 293 insertions, 19 deletions
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index 488c1f31b992..7123fee708ca 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -32,6 +32,13 @@ struct slabinfo {
32 int sanity_checks, slab_size, store_user, trace; 32 int sanity_checks, slab_size, store_user, trace;
33 int order, poison, reclaim_account, red_zone; 33 int order, poison, reclaim_account, red_zone;
34 unsigned long partial, objects, slabs; 34 unsigned long partial, objects, slabs;
35 unsigned long alloc_fastpath, alloc_slowpath;
36 unsigned long free_fastpath, free_slowpath;
37 unsigned long free_frozen, free_add_partial, free_remove_partial;
38 unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
39 unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
40 unsigned long deactivate_to_head, deactivate_to_tail;
41 unsigned long deactivate_remote_frees;
35 int numa[MAX_NODES]; 42 int numa[MAX_NODES];
36 int numa_partial[MAX_NODES]; 43 int numa_partial[MAX_NODES];
37} slabinfo[MAX_SLABS]; 44} slabinfo[MAX_SLABS];
@@ -64,8 +71,10 @@ int show_inverted = 0;
64int show_single_ref = 0; 71int show_single_ref = 0;
65int show_totals = 0; 72int show_totals = 0;
66int sort_size = 0; 73int sort_size = 0;
74int sort_active = 0;
67int set_debug = 0; 75int set_debug = 0;
68int show_ops = 0; 76int show_ops = 0;
77int show_activity = 0;
69 78
70/* Debug options */ 79/* Debug options */
71int sanity = 0; 80int sanity = 0;
@@ -93,8 +102,10 @@ void usage(void)
93 printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" 102 printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n"
94 "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" 103 "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
95 "-a|--aliases Show aliases\n" 104 "-a|--aliases Show aliases\n"
105 "-A|--activity Most active slabs first\n"
96 "-d<options>|--debug=<options> Set/Clear Debug options\n" 106 "-d<options>|--debug=<options> Set/Clear Debug options\n"
97 "-e|--empty Show empty slabs\n" 107 "-D|--display-active Switch line format to activity\n"
108 "-e|--empty Show empty slabs\n"
98 "-f|--first-alias Show first alias\n" 109 "-f|--first-alias Show first alias\n"
99 "-h|--help Show usage information\n" 110 "-h|--help Show usage information\n"
100 "-i|--inverted Inverted list\n" 111 "-i|--inverted Inverted list\n"
@@ -281,8 +292,11 @@ int line = 0;
281 292
282void first_line(void) 293void first_line(void)
283{ 294{
284 printf("Name Objects Objsize Space " 295 if (show_activity)
285 "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); 296 printf("Name Objects Alloc Free %%Fast\n");
297 else
298 printf("Name Objects Objsize Space "
299 "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
286} 300}
287 301
288/* 302/*
@@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s)
309 return s->slabs * (page_size << s->order); 323 return s->slabs * (page_size << s->order);
310} 324}
311 325
326unsigned long slab_activity(struct slabinfo *s)
327{
328 return s->alloc_fastpath + s->free_fastpath +
329 s->alloc_slowpath + s->free_slowpath;
330}
331
312void slab_numa(struct slabinfo *s, int mode) 332void slab_numa(struct slabinfo *s, int mode)
313{ 333{
314 int node; 334 int node;
@@ -392,6 +412,71 @@ const char *onoff(int x)
392 return "Off"; 412 return "Off";
393} 413}
394 414
415void slab_stats(struct slabinfo *s)
416{
417 unsigned long total_alloc;
418 unsigned long total_free;
419 unsigned long total;
420
421 if (!s->alloc_slab)
422 return;
423
424 total_alloc = s->alloc_fastpath + s->alloc_slowpath;
425 total_free = s->free_fastpath + s->free_slowpath;
426
427 if (!total_alloc)
428 return;
429
430 printf("\n");
431 printf("Slab Perf Counter Alloc Free %%Al %%Fr\n");
432 printf("--------------------------------------------------\n");
433 printf("Fastpath %8lu %8lu %3lu %3lu\n",
434 s->alloc_fastpath, s->free_fastpath,
435 s->alloc_fastpath * 100 / total_alloc,
436 s->free_fastpath * 100 / total_free);
437 printf("Slowpath %8lu %8lu %3lu %3lu\n",
438 total_alloc - s->alloc_fastpath, s->free_slowpath,
439 (total_alloc - s->alloc_fastpath) * 100 / total_alloc,
440 s->free_slowpath * 100 / total_free);
441 printf("Page Alloc %8lu %8lu %3lu %3lu\n",
442 s->alloc_slab, s->free_slab,
443 s->alloc_slab * 100 / total_alloc,
444 s->free_slab * 100 / total_free);
445 printf("Add partial %8lu %8lu %3lu %3lu\n",
446 s->deactivate_to_head + s->deactivate_to_tail,
447 s->free_add_partial,
448 (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
449 s->free_add_partial * 100 / total_free);
450 printf("Remove partial %8lu %8lu %3lu %3lu\n",
451 s->alloc_from_partial, s->free_remove_partial,
452 s->alloc_from_partial * 100 / total_alloc,
453 s->free_remove_partial * 100 / total_free);
454
455 printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
456 s->deactivate_remote_frees, s->free_frozen,
457 s->deactivate_remote_frees * 100 / total_alloc,
458 s->free_frozen * 100 / total_free);
459
460 printf("Total %8lu %8lu\n\n", total_alloc, total_free);
461
462 if (s->cpuslab_flush)
463 printf("Flushes %8lu\n", s->cpuslab_flush);
464
465 if (s->alloc_refill)
466 printf("Refill %8lu\n", s->alloc_refill);
467
468 total = s->deactivate_full + s->deactivate_empty +
469 s->deactivate_to_head + s->deactivate_to_tail;
470
471 if (total)
472 printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
473 "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
474 s->deactivate_full, (s->deactivate_full * 100) / total,
475 s->deactivate_empty, (s->deactivate_empty * 100) / total,
476 s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
477 s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
478}
479
395void report(struct slabinfo *s) 480void report(struct slabinfo *s)
396{ 481{
397 if (strcmp(s->name, "*") == 0) 482 if (strcmp(s->name, "*") == 0)
@@ -430,6 +515,7 @@ void report(struct slabinfo *s)
430 ops(s); 515 ops(s);
431 show_tracking(s); 516 show_tracking(s);
432 slab_numa(s, 1); 517 slab_numa(s, 1);
518 slab_stats(s);
433} 519}
434 520
435void slabcache(struct slabinfo *s) 521void slabcache(struct slabinfo *s)
@@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s)
479 *p++ = 'T'; 565 *p++ = 'T';
480 566
481 *p = 0; 567 *p = 0;
482 printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", 568 if (show_activity) {
483 s->name, s->objects, s->object_size, size_str, dist_str, 569 unsigned long total_alloc;
484 s->objs_per_slab, s->order, 570 unsigned long total_free;
485 s->slabs ? (s->partial * 100) / s->slabs : 100, 571
486 s->slabs ? (s->objects * s->object_size * 100) / 572 total_alloc = s->alloc_fastpath + s->alloc_slowpath;
487 (s->slabs * (page_size << s->order)) : 100, 573 total_free = s->free_fastpath + s->free_slowpath;
488 flags); 574
575 printf("%-21s %8ld %8ld %8ld %3ld %3ld \n",
576 s->name, s->objects,
577 total_alloc, total_free,
578 total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
579 total_free ? (s->free_fastpath * 100 / total_free) : 0);
580 }
581 else
582 printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
583 s->name, s->objects, s->object_size, size_str, dist_str,
584 s->objs_per_slab, s->order,
585 s->slabs ? (s->partial * 100) / s->slabs : 100,
586 s->slabs ? (s->objects * s->object_size * 100) /
587 (s->slabs * (page_size << s->order)) : 100,
588 flags);
489} 589}
490 590
491/* 591/*
@@ -892,6 +992,8 @@ void sort_slabs(void)
892 992
893 if (sort_size) 993 if (sort_size)
894 result = slab_size(s1) < slab_size(s2); 994 result = slab_size(s1) < slab_size(s2);
995 else if (sort_active)
996 result = slab_activity(s1) < slab_activity(s2);
895 else 997 else
896 result = strcasecmp(s1->name, s2->name); 998 result = strcasecmp(s1->name, s2->name);
897 999
@@ -1074,6 +1176,23 @@ void read_slab_dir(void)
1074 free(t); 1176 free(t);
1075 slab->store_user = get_obj("store_user"); 1177 slab->store_user = get_obj("store_user");
1076 slab->trace = get_obj("trace"); 1178 slab->trace = get_obj("trace");
1179 slab->alloc_fastpath = get_obj("alloc_fastpath");
1180 slab->alloc_slowpath = get_obj("alloc_slowpath");
1181 slab->free_fastpath = get_obj("free_fastpath");
1182 slab->free_slowpath = get_obj("free_slowpath");
1183 slab->free_frozen= get_obj("free_frozen");
1184 slab->free_add_partial = get_obj("free_add_partial");
1185 slab->free_remove_partial = get_obj("free_remove_partial");
1186 slab->alloc_from_partial = get_obj("alloc_from_partial");
1187 slab->alloc_slab = get_obj("alloc_slab");
1188 slab->alloc_refill = get_obj("alloc_refill");
1189 slab->free_slab = get_obj("free_slab");
1190 slab->cpuslab_flush = get_obj("cpuslab_flush");
1191 slab->deactivate_full = get_obj("deactivate_full");
1192 slab->deactivate_empty = get_obj("deactivate_empty");
1193 slab->deactivate_to_head = get_obj("deactivate_to_head");
1194 slab->deactivate_to_tail = get_obj("deactivate_to_tail");
1195 slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
1077 chdir(".."); 1196 chdir("..");
1078 if (slab->name[0] == ':') 1197 if (slab->name[0] == ':')
1079 alias_targets++; 1198 alias_targets++;
@@ -1124,7 +1243,9 @@ void output_slabs(void)
1124 1243
1125struct option opts[] = { 1244struct option opts[] = {
1126 { "aliases", 0, NULL, 'a' }, 1245 { "aliases", 0, NULL, 'a' },
1246 { "activity", 0, NULL, 'A' },
1127 { "debug", 2, NULL, 'd' }, 1247 { "debug", 2, NULL, 'd' },
1248 { "display-activity", 0, NULL, 'D' },
1128 { "empty", 0, NULL, 'e' }, 1249 { "empty", 0, NULL, 'e' },
1129 { "first-alias", 0, NULL, 'f' }, 1250 { "first-alias", 0, NULL, 'f' },
1130 { "help", 0, NULL, 'h' }, 1251 { "help", 0, NULL, 'h' },
@@ -1149,7 +1270,7 @@ int main(int argc, char *argv[])
1149 1270
1150 page_size = getpagesize(); 1271 page_size = getpagesize();
1151 1272
1152 while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", 1273 while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
1153 opts, NULL)) != -1) 1274 opts, NULL)) != -1)
1154 switch (c) { 1275 switch (c) {
1155 case '1': 1276 case '1':
@@ -1158,11 +1279,17 @@ int main(int argc, char *argv[])
1158 case 'a': 1279 case 'a':
1159 show_alias = 1; 1280 show_alias = 1;
1160 break; 1281 break;
1282 case 'A':
1283 sort_active = 1;
1284 break;
1161 case 'd': 1285 case 'd':
1162 set_debug = 1; 1286 set_debug = 1;
1163 if (!debug_opt_scan(optarg)) 1287 if (!debug_opt_scan(optarg))
1164 fatal("Invalid debug option '%s'\n", optarg); 1288 fatal("Invalid debug option '%s'\n", optarg);
1165 break; 1289 break;
1290 case 'D':
1291 show_activity = 1;
1292 break;
1166 case 'e': 1293 case 'e':
1167 show_empty = 1; 1294 show_empty = 1;
1168 break; 1295 break;
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ddb1a706b144..5e6d3d634d5b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,12 +11,35 @@
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13 13
14enum stat_item {
15 ALLOC_FASTPATH, /* Allocation from cpu slab */
16 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
17 FREE_FASTPATH, /* Free to cpu slub */
18 FREE_SLOWPATH, /* Freeing not to cpu slab */
19 FREE_FROZEN, /* Freeing to frozen slab */
20 FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
21 FREE_REMOVE_PARTIAL, /* Freeing removes last object */
22 ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */
23 ALLOC_SLAB, /* Cpu slab acquired from page allocator */
24 ALLOC_REFILL, /* Refill cpu slab from slab freelist */
25 FREE_SLAB, /* Slab freed to the page allocator */
26 CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
27 DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
28 DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
29 DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
30 DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
31 DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
32 NR_SLUB_STAT_ITEMS };
33
14struct kmem_cache_cpu { 34struct kmem_cache_cpu {
15 void **freelist; /* Pointer to first free per cpu object */ 35 void **freelist; /* Pointer to first free per cpu object */
16 struct page *page; /* The slab from which we are allocating */ 36 struct page *page; /* The slab from which we are allocating */
17 int node; /* The node of the page (or -1 for debug) */ 37 int node; /* The node of the page (or -1 for debug) */
18 unsigned int offset; /* Freepointer offset (in word units) */ 38 unsigned int offset; /* Freepointer offset (in word units) */
19 unsigned int objsize; /* Size of an object (from kmem_cache) */ 39 unsigned int objsize; /* Size of an object (from kmem_cache) */
40#ifdef CONFIG_SLUB_STATS
41 unsigned stat[NR_SLUB_STAT_ITEMS];
42#endif
20}; 43};
21 44
22struct kmem_cache_node { 45struct kmem_cache_node {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d385be682db..4f4008fc73e4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -205,6 +205,19 @@ config SLUB_DEBUG_ON
205 off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying 205 off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
206 "slub_debug=-". 206 "slub_debug=-".
207 207
208config SLUB_STATS
209 default n
210 bool "Enable SLUB performance statistics"
211 depends on SLUB
212 help
213 SLUB statistics are useful to debug SLUBs allocation behavior in
214 order find ways to optimize the allocator. This should never be
215 enabled for production use since keeping statistics slows down
216 the allocator by a few percentage points. The slabinfo command
217 supports the determination of the most active slabs to figure
218 out which slabs are relevant to a particular load.
219 Try running: slabinfo -DA
220
208config DEBUG_PREEMPT 221config DEBUG_PREEMPT
209 bool "Debug preemptible kernel" 222 bool "Debug preemptible kernel"
210 depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) 223 depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)
diff --git a/mm/slub.c b/mm/slub.c
index 20ab8f0a4eb9..ac836d31e3be 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -250,6 +250,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
250static int sysfs_slab_add(struct kmem_cache *); 250static int sysfs_slab_add(struct kmem_cache *);
251static int sysfs_slab_alias(struct kmem_cache *, const char *); 251static int sysfs_slab_alias(struct kmem_cache *, const char *);
252static void sysfs_slab_remove(struct kmem_cache *); 252static void sysfs_slab_remove(struct kmem_cache *);
253
253#else 254#else
254static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 255static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
255static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 256static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
@@ -258,8 +259,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
258{ 259{
259 kfree(s); 260 kfree(s);
260} 261}
262
261#endif 263#endif
262 264
265static inline void stat(struct kmem_cache_cpu *c, enum stat_item si)
266{
267#ifdef CONFIG_SLUB_STATS
268 c->stat[si]++;
269#endif
270}
271
263/******************************************************************** 272/********************************************************************
264 * Core slab cache functions 273 * Core slab cache functions
265 *******************************************************************/ 274 *******************************************************************/
@@ -1364,17 +1373,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1364static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1373static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1365{ 1374{
1366 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1375 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1376 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
1367 1377
1368 ClearSlabFrozen(page); 1378 ClearSlabFrozen(page);
1369 if (page->inuse) { 1379 if (page->inuse) {
1370 1380
1371 if (page->freelist != page->end) 1381 if (page->freelist != page->end) {
1372 add_partial(n, page, tail); 1382 add_partial(n, page, tail);
1373 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1383 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1374 add_full(n, page); 1384 } else {
1385 stat(c, DEACTIVATE_FULL);
1386 if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
1387 add_full(n, page);
1388 }
1375 slab_unlock(page); 1389 slab_unlock(page);
1376
1377 } else { 1390 } else {
1391 stat(c, DEACTIVATE_EMPTY);
1378 if (n->nr_partial < MIN_PARTIAL) { 1392 if (n->nr_partial < MIN_PARTIAL) {
1379 /* 1393 /*
1380 * Adding an empty slab to the partial slabs in order 1394 * Adding an empty slab to the partial slabs in order
@@ -1388,6 +1402,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1388 slab_unlock(page); 1402 slab_unlock(page);
1389 } else { 1403 } else {
1390 slab_unlock(page); 1404 slab_unlock(page);
1405 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
1391 discard_slab(s, page); 1406 discard_slab(s, page);
1392 } 1407 }
1393 } 1408 }
@@ -1400,6 +1415,9 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1400{ 1415{
1401 struct page *page = c->page; 1416 struct page *page = c->page;
1402 int tail = 1; 1417 int tail = 1;
1418
1419 if (c->freelist)
1420 stat(c, DEACTIVATE_REMOTE_FREES);
1403 /* 1421 /*
1404 * Merge cpu freelist into freelist. Typically we get here 1422 * Merge cpu freelist into freelist. Typically we get here
1405 * because both freelists are empty. So this is unlikely 1423 * because both freelists are empty. So this is unlikely
@@ -1429,6 +1447,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1429 1447
1430static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1448static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1431{ 1449{
1450 stat(c, CPUSLAB_FLUSH);
1432 slab_lock(c->page); 1451 slab_lock(c->page);
1433 deactivate_slab(s, c); 1452 deactivate_slab(s, c);
1434} 1453}
@@ -1511,6 +1530,7 @@ static void *__slab_alloc(struct kmem_cache *s,
1511 slab_lock(c->page); 1530 slab_lock(c->page);
1512 if (unlikely(!node_match(c, node))) 1531 if (unlikely(!node_match(c, node)))
1513 goto another_slab; 1532 goto another_slab;
1533 stat(c, ALLOC_REFILL);
1514load_freelist: 1534load_freelist:
1515 object = c->page->freelist; 1535 object = c->page->freelist;
1516 if (unlikely(object == c->page->end)) 1536 if (unlikely(object == c->page->end))
@@ -1525,6 +1545,7 @@ load_freelist:
1525 c->node = page_to_nid(c->page); 1545 c->node = page_to_nid(c->page);
1526unlock_out: 1546unlock_out:
1527 slab_unlock(c->page); 1547 slab_unlock(c->page);
1548 stat(c, ALLOC_SLOWPATH);
1528out: 1549out:
1529#ifdef SLUB_FASTPATH 1550#ifdef SLUB_FASTPATH
1530 local_irq_restore(flags); 1551 local_irq_restore(flags);
@@ -1538,6 +1559,7 @@ new_slab:
1538 new = get_partial(s, gfpflags, node); 1559 new = get_partial(s, gfpflags, node);
1539 if (new) { 1560 if (new) {
1540 c->page = new; 1561 c->page = new;
1562 stat(c, ALLOC_FROM_PARTIAL);
1541 goto load_freelist; 1563 goto load_freelist;
1542 } 1564 }
1543 1565
@@ -1551,6 +1573,7 @@ new_slab:
1551 1573
1552 if (new) { 1574 if (new) {
1553 c = get_cpu_slab(s, smp_processor_id()); 1575 c = get_cpu_slab(s, smp_processor_id());
1576 stat(c, ALLOC_SLAB);
1554 if (c->page) 1577 if (c->page)
1555 flush_slab(s, c); 1578 flush_slab(s, c);
1556 slab_lock(new); 1579 slab_lock(new);
@@ -1610,6 +1633,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1610 object = __slab_alloc(s, gfpflags, node, addr, c); 1633 object = __slab_alloc(s, gfpflags, node, addr, c);
1611 break; 1634 break;
1612 } 1635 }
1636 stat(c, ALLOC_FASTPATH);
1613 } while (cmpxchg_local(&c->freelist, object, object[c->offset]) 1637 } while (cmpxchg_local(&c->freelist, object, object[c->offset])
1614 != object); 1638 != object);
1615#else 1639#else
@@ -1624,6 +1648,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1624 else { 1648 else {
1625 object = c->freelist; 1649 object = c->freelist;
1626 c->freelist = object[c->offset]; 1650 c->freelist = object[c->offset];
1651 stat(c, ALLOC_FASTPATH);
1627 } 1652 }
1628 local_irq_restore(flags); 1653 local_irq_restore(flags);
1629#endif 1654#endif
@@ -1661,12 +1686,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1661{ 1686{
1662 void *prior; 1687 void *prior;
1663 void **object = (void *)x; 1688 void **object = (void *)x;
1689 struct kmem_cache_cpu *c;
1664 1690
1665#ifdef SLUB_FASTPATH 1691#ifdef SLUB_FASTPATH
1666 unsigned long flags; 1692 unsigned long flags;
1667 1693
1668 local_irq_save(flags); 1694 local_irq_save(flags);
1669#endif 1695#endif
1696 c = get_cpu_slab(s, raw_smp_processor_id());
1697 stat(c, FREE_SLOWPATH);
1670 slab_lock(page); 1698 slab_lock(page);
1671 1699
1672 if (unlikely(SlabDebug(page))) 1700 if (unlikely(SlabDebug(page)))
@@ -1676,8 +1704,10 @@ checks_ok:
1676 page->freelist = object; 1704 page->freelist = object;
1677 page->inuse--; 1705 page->inuse--;
1678 1706
1679 if (unlikely(SlabFrozen(page))) 1707 if (unlikely(SlabFrozen(page))) {
1708 stat(c, FREE_FROZEN);
1680 goto out_unlock; 1709 goto out_unlock;
1710 }
1681 1711
1682 if (unlikely(!page->inuse)) 1712 if (unlikely(!page->inuse))
1683 goto slab_empty; 1713 goto slab_empty;
@@ -1687,8 +1717,10 @@ checks_ok:
1687 * was not on the partial list before 1717 * was not on the partial list before
1688 * then add it. 1718 * then add it.
1689 */ 1719 */
1690 if (unlikely(prior == page->end)) 1720 if (unlikely(prior == page->end)) {
1691 add_partial(get_node(s, page_to_nid(page)), page, 1); 1721 add_partial(get_node(s, page_to_nid(page)), page, 1);
1722 stat(c, FREE_ADD_PARTIAL);
1723 }
1692 1724
1693out_unlock: 1725out_unlock:
1694 slab_unlock(page); 1726 slab_unlock(page);
@@ -1698,13 +1730,15 @@ out_unlock:
1698 return; 1730 return;
1699 1731
1700slab_empty: 1732slab_empty:
1701 if (prior != page->end) 1733 if (prior != page->end) {
1702 /* 1734 /*
1703 * Slab still on the partial list. 1735 * Slab still on the partial list.
1704 */ 1736 */
1705 remove_partial(s, page); 1737 remove_partial(s, page);
1706 1738 stat(c, FREE_REMOVE_PARTIAL);
1739 }
1707 slab_unlock(page); 1740 slab_unlock(page);
1741 stat(c, FREE_SLAB);
1708#ifdef SLUB_FASTPATH 1742#ifdef SLUB_FASTPATH
1709 local_irq_restore(flags); 1743 local_irq_restore(flags);
1710#endif 1744#endif
@@ -1758,6 +1792,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
1758 break; 1792 break;
1759 } 1793 }
1760 object[c->offset] = freelist; 1794 object[c->offset] = freelist;
1795 stat(c, FREE_FASTPATH);
1761 } while (cmpxchg_local(&c->freelist, freelist, object) != freelist); 1796 } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
1762#else 1797#else
1763 unsigned long flags; 1798 unsigned long flags;
@@ -1768,6 +1803,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
1768 if (likely(page == c->page && c->node >= 0)) { 1803 if (likely(page == c->page && c->node >= 0)) {
1769 object[c->offset] = c->freelist; 1804 object[c->offset] = c->freelist;
1770 c->freelist = object; 1805 c->freelist = object;
1806 stat(c, FREE_FASTPATH);
1771 } else 1807 } else
1772 __slab_free(s, page, x, addr, c->offset); 1808 __slab_free(s, page, x, addr, c->offset);
1773 1809
@@ -3980,6 +4016,62 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
3980SLAB_ATTR(remote_node_defrag_ratio); 4016SLAB_ATTR(remote_node_defrag_ratio);
3981#endif 4017#endif
3982 4018
4019#ifdef CONFIG_SLUB_STATS
4020
4021static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4022{
4023 unsigned long sum = 0;
4024 int cpu;
4025 int len;
4026 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4027
4028 if (!data)
4029 return -ENOMEM;
4030
4031 for_each_online_cpu(cpu) {
4032 unsigned x = get_cpu_slab(s, cpu)->stat[si];
4033
4034 data[cpu] = x;
4035 sum += x;
4036 }
4037
4038 len = sprintf(buf, "%lu", sum);
4039
4040 for_each_online_cpu(cpu) {
4041 if (data[cpu] && len < PAGE_SIZE - 20)
4042 len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
4043 }
4044 kfree(data);
4045 return len + sprintf(buf + len, "\n");
4046}
4047
4048#define STAT_ATTR(si, text) \
4049static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4050{ \
4051 return show_stat(s, buf, si); \
4052} \
4053SLAB_ATTR_RO(text); \
4054
4055STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4056STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4057STAT_ATTR(FREE_FASTPATH, free_fastpath);
4058STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4059STAT_ATTR(FREE_FROZEN, free_frozen);
4060STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4061STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4062STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4063STAT_ATTR(ALLOC_SLAB, alloc_slab);
4064STAT_ATTR(ALLOC_REFILL, alloc_refill);
4065STAT_ATTR(FREE_SLAB, free_slab);
4066STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4067STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4068STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4069STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4070STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4071STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4072
4073#endif
4074
3983static struct attribute *slab_attrs[] = { 4075static struct attribute *slab_attrs[] = {
3984 &slab_size_attr.attr, 4076 &slab_size_attr.attr,
3985 &object_size_attr.attr, 4077 &object_size_attr.attr,
@@ -4010,6 +4102,25 @@ static struct attribute *slab_attrs[] = {
4010#ifdef CONFIG_NUMA 4102#ifdef CONFIG_NUMA
4011 &remote_node_defrag_ratio_attr.attr, 4103 &remote_node_defrag_ratio_attr.attr,
4012#endif 4104#endif
4105#ifdef CONFIG_SLUB_STATS
4106 &alloc_fastpath_attr.attr,
4107 &alloc_slowpath_attr.attr,
4108 &free_fastpath_attr.attr,
4109 &free_slowpath_attr.attr,
4110 &free_frozen_attr.attr,
4111 &free_add_partial_attr.attr,
4112 &free_remove_partial_attr.attr,
4113 &alloc_from_partial_attr.attr,
4114 &alloc_slab_attr.attr,
4115 &alloc_refill_attr.attr,
4116 &free_slab_attr.attr,
4117 &cpuslab_flush_attr.attr,
4118 &deactivate_full_attr.attr,
4119 &deactivate_empty_attr.attr,
4120 &deactivate_to_head_attr.attr,
4121 &deactivate_to_tail_attr.attr,
4122 &deactivate_remote_frees_attr.attr,
4123#endif
4013 NULL 4124 NULL
4014}; 4125};
4015 4126