aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 21:22:29 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 21:22:29 -0500
commitc00f08d705e149fbfaf7a252b4d4fbb7affdcc96 (patch)
tree8c916856376d0d400ddda239d5be386f9b9516d7
parentc8b6de16d9434405e5832b8772e4f986ddd5118e (diff)
parent3adbefee6fd58a061b2bf1df4f3769701860fc62 (diff)
Merge branch 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm
* 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm: SLUB: fix checkpatch warnings Use non atomic unlock SLUB: Support for performance statistics SLUB: Alternate fast paths using cmpxchg_local SLUB: Use unique end pointer for each slab page. SLUB: Deal with annoying gcc warning on kfree()
-rw-r--r--Documentation/vm/slabinfo.c149
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/slub_def.h23
-rw-r--r--lib/Kconfig.debug13
-rw-r--r--mm/slub.c326
6 files changed, 457 insertions, 63 deletions
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index 488c1f31b992..7123fee708ca 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -32,6 +32,13 @@ struct slabinfo {
32 int sanity_checks, slab_size, store_user, trace; 32 int sanity_checks, slab_size, store_user, trace;
33 int order, poison, reclaim_account, red_zone; 33 int order, poison, reclaim_account, red_zone;
34 unsigned long partial, objects, slabs; 34 unsigned long partial, objects, slabs;
35 unsigned long alloc_fastpath, alloc_slowpath;
36 unsigned long free_fastpath, free_slowpath;
37 unsigned long free_frozen, free_add_partial, free_remove_partial;
38 unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
39 unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
40 unsigned long deactivate_to_head, deactivate_to_tail;
41 unsigned long deactivate_remote_frees;
35 int numa[MAX_NODES]; 42 int numa[MAX_NODES];
36 int numa_partial[MAX_NODES]; 43 int numa_partial[MAX_NODES];
37} slabinfo[MAX_SLABS]; 44} slabinfo[MAX_SLABS];
@@ -64,8 +71,10 @@ int show_inverted = 0;
64int show_single_ref = 0; 71int show_single_ref = 0;
65int show_totals = 0; 72int show_totals = 0;
66int sort_size = 0; 73int sort_size = 0;
74int sort_active = 0;
67int set_debug = 0; 75int set_debug = 0;
68int show_ops = 0; 76int show_ops = 0;
77int show_activity = 0;
69 78
70/* Debug options */ 79/* Debug options */
71int sanity = 0; 80int sanity = 0;
@@ -93,8 +102,10 @@ void usage(void)
93 printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" 102 printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n"
94 "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" 103 "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
95 "-a|--aliases Show aliases\n" 104 "-a|--aliases Show aliases\n"
105 "-A|--activity Most active slabs first\n"
96 "-d<options>|--debug=<options> Set/Clear Debug options\n" 106 "-d<options>|--debug=<options> Set/Clear Debug options\n"
97 "-e|--empty Show empty slabs\n" 107 "-D|--display-active Switch line format to activity\n"
108 "-e|--empty Show empty slabs\n"
98 "-f|--first-alias Show first alias\n" 109 "-f|--first-alias Show first alias\n"
99 "-h|--help Show usage information\n" 110 "-h|--help Show usage information\n"
100 "-i|--inverted Inverted list\n" 111 "-i|--inverted Inverted list\n"
@@ -281,8 +292,11 @@ int line = 0;
281 292
282void first_line(void) 293void first_line(void)
283{ 294{
284 printf("Name Objects Objsize Space " 295 if (show_activity)
285 "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); 296 printf("Name Objects Alloc Free %%Fast\n");
297 else
298 printf("Name Objects Objsize Space "
299 "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
286} 300}
287 301
288/* 302/*
@@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s)
309 return s->slabs * (page_size << s->order); 323 return s->slabs * (page_size << s->order);
310} 324}
311 325
326unsigned long slab_activity(struct slabinfo *s)
327{
328 return s->alloc_fastpath + s->free_fastpath +
329 s->alloc_slowpath + s->free_slowpath;
330}
331
312void slab_numa(struct slabinfo *s, int mode) 332void slab_numa(struct slabinfo *s, int mode)
313{ 333{
314 int node; 334 int node;
@@ -392,6 +412,71 @@ const char *onoff(int x)
392 return "Off"; 412 return "Off";
393} 413}
394 414
415void slab_stats(struct slabinfo *s)
416{
417 unsigned long total_alloc;
418 unsigned long total_free;
419 unsigned long total;
420
421 if (!s->alloc_slab)
422 return;
423
424 total_alloc = s->alloc_fastpath + s->alloc_slowpath;
425 total_free = s->free_fastpath + s->free_slowpath;
426
427 if (!total_alloc)
428 return;
429
430 printf("\n");
431 printf("Slab Perf Counter Alloc Free %%Al %%Fr\n");
432 printf("--------------------------------------------------\n");
433 printf("Fastpath %8lu %8lu %3lu %3lu\n",
434 s->alloc_fastpath, s->free_fastpath,
435 s->alloc_fastpath * 100 / total_alloc,
436 s->free_fastpath * 100 / total_free);
437 printf("Slowpath %8lu %8lu %3lu %3lu\n",
438 total_alloc - s->alloc_fastpath, s->free_slowpath,
439 (total_alloc - s->alloc_fastpath) * 100 / total_alloc,
440 s->free_slowpath * 100 / total_free);
441 printf("Page Alloc %8lu %8lu %3lu %3lu\n",
442 s->alloc_slab, s->free_slab,
443 s->alloc_slab * 100 / total_alloc,
444 s->free_slab * 100 / total_free);
445 printf("Add partial %8lu %8lu %3lu %3lu\n",
446 s->deactivate_to_head + s->deactivate_to_tail,
447 s->free_add_partial,
448 (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
449 s->free_add_partial * 100 / total_free);
450 printf("Remove partial %8lu %8lu %3lu %3lu\n",
451 s->alloc_from_partial, s->free_remove_partial,
452 s->alloc_from_partial * 100 / total_alloc,
453 s->free_remove_partial * 100 / total_free);
454
455 printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
456 s->deactivate_remote_frees, s->free_frozen,
457 s->deactivate_remote_frees * 100 / total_alloc,
458 s->free_frozen * 100 / total_free);
459
460 printf("Total %8lu %8lu\n\n", total_alloc, total_free);
461
462 if (s->cpuslab_flush)
463 printf("Flushes %8lu\n", s->cpuslab_flush);
464
465 if (s->alloc_refill)
466 printf("Refill %8lu\n", s->alloc_refill);
467
468 total = s->deactivate_full + s->deactivate_empty +
469 s->deactivate_to_head + s->deactivate_to_tail;
470
471 if (total)
472 printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
473 "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
474 s->deactivate_full, (s->deactivate_full * 100) / total,
475 s->deactivate_empty, (s->deactivate_empty * 100) / total,
476 s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
477 s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
478}
479
395void report(struct slabinfo *s) 480void report(struct slabinfo *s)
396{ 481{
397 if (strcmp(s->name, "*") == 0) 482 if (strcmp(s->name, "*") == 0)
@@ -430,6 +515,7 @@ void report(struct slabinfo *s)
430 ops(s); 515 ops(s);
431 show_tracking(s); 516 show_tracking(s);
432 slab_numa(s, 1); 517 slab_numa(s, 1);
518 slab_stats(s);
433} 519}
434 520
435void slabcache(struct slabinfo *s) 521void slabcache(struct slabinfo *s)
@@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s)
479 *p++ = 'T'; 565 *p++ = 'T';
480 566
481 *p = 0; 567 *p = 0;
482 printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", 568 if (show_activity) {
483 s->name, s->objects, s->object_size, size_str, dist_str, 569 unsigned long total_alloc;
484 s->objs_per_slab, s->order, 570 unsigned long total_free;
485 s->slabs ? (s->partial * 100) / s->slabs : 100, 571
486 s->slabs ? (s->objects * s->object_size * 100) / 572 total_alloc = s->alloc_fastpath + s->alloc_slowpath;
487 (s->slabs * (page_size << s->order)) : 100, 573 total_free = s->free_fastpath + s->free_slowpath;
488 flags); 574
575 printf("%-21s %8ld %8ld %8ld %3ld %3ld \n",
576 s->name, s->objects,
577 total_alloc, total_free,
578 total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
579 total_free ? (s->free_fastpath * 100 / total_free) : 0);
580 }
581 else
582 printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
583 s->name, s->objects, s->object_size, size_str, dist_str,
584 s->objs_per_slab, s->order,
585 s->slabs ? (s->partial * 100) / s->slabs : 100,
586 s->slabs ? (s->objects * s->object_size * 100) /
587 (s->slabs * (page_size << s->order)) : 100,
588 flags);
489} 589}
490 590
491/* 591/*
@@ -892,6 +992,8 @@ void sort_slabs(void)
892 992
893 if (sort_size) 993 if (sort_size)
894 result = slab_size(s1) < slab_size(s2); 994 result = slab_size(s1) < slab_size(s2);
995 else if (sort_active)
996 result = slab_activity(s1) < slab_activity(s2);
895 else 997 else
896 result = strcasecmp(s1->name, s2->name); 998 result = strcasecmp(s1->name, s2->name);
897 999
@@ -1074,6 +1176,23 @@ void read_slab_dir(void)
1074 free(t); 1176 free(t);
1075 slab->store_user = get_obj("store_user"); 1177 slab->store_user = get_obj("store_user");
1076 slab->trace = get_obj("trace"); 1178 slab->trace = get_obj("trace");
1179 slab->alloc_fastpath = get_obj("alloc_fastpath");
1180 slab->alloc_slowpath = get_obj("alloc_slowpath");
1181 slab->free_fastpath = get_obj("free_fastpath");
1182 slab->free_slowpath = get_obj("free_slowpath");
1183 slab->free_frozen= get_obj("free_frozen");
1184 slab->free_add_partial = get_obj("free_add_partial");
1185 slab->free_remove_partial = get_obj("free_remove_partial");
1186 slab->alloc_from_partial = get_obj("alloc_from_partial");
1187 slab->alloc_slab = get_obj("alloc_slab");
1188 slab->alloc_refill = get_obj("alloc_refill");
1189 slab->free_slab = get_obj("free_slab");
1190 slab->cpuslab_flush = get_obj("cpuslab_flush");
1191 slab->deactivate_full = get_obj("deactivate_full");
1192 slab->deactivate_empty = get_obj("deactivate_empty");
1193 slab->deactivate_to_head = get_obj("deactivate_to_head");
1194 slab->deactivate_to_tail = get_obj("deactivate_to_tail");
1195 slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
1077 chdir(".."); 1196 chdir("..");
1078 if (slab->name[0] == ':') 1197 if (slab->name[0] == ':')
1079 alias_targets++; 1198 alias_targets++;
@@ -1124,7 +1243,9 @@ void output_slabs(void)
1124 1243
1125struct option opts[] = { 1244struct option opts[] = {
1126 { "aliases", 0, NULL, 'a' }, 1245 { "aliases", 0, NULL, 'a' },
1246 { "activity", 0, NULL, 'A' },
1127 { "debug", 2, NULL, 'd' }, 1247 { "debug", 2, NULL, 'd' },
1248 { "display-activity", 0, NULL, 'D' },
1128 { "empty", 0, NULL, 'e' }, 1249 { "empty", 0, NULL, 'e' },
1129 { "first-alias", 0, NULL, 'f' }, 1250 { "first-alias", 0, NULL, 'f' },
1130 { "help", 0, NULL, 'h' }, 1251 { "help", 0, NULL, 'h' },
@@ -1149,7 +1270,7 @@ int main(int argc, char *argv[])
1149 1270
1150 page_size = getpagesize(); 1271 page_size = getpagesize();
1151 1272
1152 while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", 1273 while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
1153 opts, NULL)) != -1) 1274 opts, NULL)) != -1)
1154 switch (c) { 1275 switch (c) {
1155 case '1': 1276 case '1':
@@ -1158,11 +1279,17 @@ int main(int argc, char *argv[])
1158 case 'a': 1279 case 'a':
1159 show_alias = 1; 1280 show_alias = 1;
1160 break; 1281 break;
1282 case 'A':
1283 sort_active = 1;
1284 break;
1161 case 'd': 1285 case 'd':
1162 set_debug = 1; 1286 set_debug = 1;
1163 if (!debug_opt_scan(optarg)) 1287 if (!debug_opt_scan(optarg))
1164 fatal("Invalid debug option '%s'\n", optarg); 1288 fatal("Invalid debug option '%s'\n", optarg);
1165 break; 1289 break;
1290 case 'D':
1291 show_activity = 1;
1292 break;
1166 case 'e': 1293 case 'e':
1167 show_empty = 1; 1294 show_empty = 1;
1168 break; 1295 break;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c95482b6b6dd..9d0acedf5f3f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -52,6 +52,10 @@ config HAVE_LATENCYTOP_SUPPORT
52config SEMAPHORE_SLEEPERS 52config SEMAPHORE_SLEEPERS
53 def_bool y 53 def_bool y
54 54
55config FAST_CMPXCHG_LOCAL
56 bool
57 default y
58
55config MMU 59config MMU
56 def_bool y 60 def_bool y
57 61
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 34023c65d466..bfee0bd1d435 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -64,7 +64,10 @@ struct page {
64#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 64#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
65 spinlock_t ptl; 65 spinlock_t ptl;
66#endif 66#endif
67 struct kmem_cache *slab; /* SLUB: Pointer to slab */ 67 struct {
68 struct kmem_cache *slab; /* SLUB: Pointer to slab */
69 void *end; /* SLUB: end marker */
70 };
68 struct page *first_page; /* Compound tail pages */ 71 struct page *first_page; /* Compound tail pages */
69 }; 72 };
70 union { 73 union {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ddb1a706b144..5e6d3d634d5b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,12 +11,35 @@
11#include <linux/workqueue.h> 11#include <linux/workqueue.h>
12#include <linux/kobject.h> 12#include <linux/kobject.h>
13 13
14enum stat_item {
15 ALLOC_FASTPATH, /* Allocation from cpu slab */
16 ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
17 FREE_FASTPATH, /* Free to cpu slub */
18 FREE_SLOWPATH, /* Freeing not to cpu slab */
19 FREE_FROZEN, /* Freeing to frozen slab */
20 FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
21 FREE_REMOVE_PARTIAL, /* Freeing removes last object */
22 ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */
23 ALLOC_SLAB, /* Cpu slab acquired from page allocator */
24 ALLOC_REFILL, /* Refill cpu slab from slab freelist */
25 FREE_SLAB, /* Slab freed to the page allocator */
26 CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
27 DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
28 DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
29 DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
30 DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
31 DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
32 NR_SLUB_STAT_ITEMS };
33
14struct kmem_cache_cpu { 34struct kmem_cache_cpu {
15 void **freelist; /* Pointer to first free per cpu object */ 35 void **freelist; /* Pointer to first free per cpu object */
16 struct page *page; /* The slab from which we are allocating */ 36 struct page *page; /* The slab from which we are allocating */
17 int node; /* The node of the page (or -1 for debug) */ 37 int node; /* The node of the page (or -1 for debug) */
18 unsigned int offset; /* Freepointer offset (in word units) */ 38 unsigned int offset; /* Freepointer offset (in word units) */
19 unsigned int objsize; /* Size of an object (from kmem_cache) */ 39 unsigned int objsize; /* Size of an object (from kmem_cache) */
40#ifdef CONFIG_SLUB_STATS
41 unsigned stat[NR_SLUB_STAT_ITEMS];
42#endif
20}; 43};
21 44
22struct kmem_cache_node { 45struct kmem_cache_node {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0d385be682db..4f4008fc73e4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -205,6 +205,19 @@ config SLUB_DEBUG_ON
205 off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying 205 off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
206 "slub_debug=-". 206 "slub_debug=-".
207 207
208config SLUB_STATS
209 default n
210 bool "Enable SLUB performance statistics"
211 depends on SLUB
212 help
213 SLUB statistics are useful to debug SLUBs allocation behavior in
214 order find ways to optimize the allocator. This should never be
215 enabled for production use since keeping statistics slows down
216 the allocator by a few percentage points. The slabinfo command
217 supports the determination of the most active slabs to figure
218 out which slabs are relevant to a particular load.
219 Try running: slabinfo -DA
220
208config DEBUG_PREEMPT 221config DEBUG_PREEMPT
209 bool "Debug preemptible kernel" 222 bool "Debug preemptible kernel"
210 depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) 223 depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64)
diff --git a/mm/slub.c b/mm/slub.c
index 3f056677fa8f..e2989ae243b5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -149,6 +149,13 @@ static inline void ClearSlabDebug(struct page *page)
149/* Enable to test recovery from slab corruption on boot */ 149/* Enable to test recovery from slab corruption on boot */
150#undef SLUB_RESILIENCY_TEST 150#undef SLUB_RESILIENCY_TEST
151 151
152/*
153 * Currently fastpath is not supported if preemption is enabled.
154 */
155#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT)
156#define SLUB_FASTPATH
157#endif
158
152#if PAGE_SHIFT <= 12 159#if PAGE_SHIFT <= 12
153 160
154/* 161/*
@@ -243,6 +250,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
243static int sysfs_slab_add(struct kmem_cache *); 250static int sysfs_slab_add(struct kmem_cache *);
244static int sysfs_slab_alias(struct kmem_cache *, const char *); 251static int sysfs_slab_alias(struct kmem_cache *, const char *);
245static void sysfs_slab_remove(struct kmem_cache *); 252static void sysfs_slab_remove(struct kmem_cache *);
253
246#else 254#else
247static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 255static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
248static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 256static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
@@ -251,8 +259,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
251{ 259{
252 kfree(s); 260 kfree(s);
253} 261}
262
254#endif 263#endif
255 264
265static inline void stat(struct kmem_cache_cpu *c, enum stat_item si)
266{
267#ifdef CONFIG_SLUB_STATS
268 c->stat[si]++;
269#endif
270}
271
256/******************************************************************** 272/********************************************************************
257 * Core slab cache functions 273 * Core slab cache functions
258 *******************************************************************/ 274 *******************************************************************/
@@ -280,15 +296,32 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
280#endif 296#endif
281} 297}
282 298
299/*
300 * The end pointer in a slab is special. It points to the first object in the
301 * slab but has bit 0 set to mark it.
302 *
303 * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
304 * in the mapping set.
305 */
306static inline int is_end(void *addr)
307{
308 return (unsigned long)addr & PAGE_MAPPING_ANON;
309}
310
311void *slab_address(struct page *page)
312{
313 return page->end - PAGE_MAPPING_ANON;
314}
315
283static inline int check_valid_pointer(struct kmem_cache *s, 316static inline int check_valid_pointer(struct kmem_cache *s,
284 struct page *page, const void *object) 317 struct page *page, const void *object)
285{ 318{
286 void *base; 319 void *base;
287 320
288 if (!object) 321 if (object == page->end)
289 return 1; 322 return 1;
290 323
291 base = page_address(page); 324 base = slab_address(page);
292 if (object < base || object >= base + s->objects * s->size || 325 if (object < base || object >= base + s->objects * s->size ||
293 (object - base) % s->size) { 326 (object - base) % s->size) {
294 return 0; 327 return 0;
@@ -321,7 +354,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
321 354
322/* Scan freelist */ 355/* Scan freelist */
323#define for_each_free_object(__p, __s, __free) \ 356#define for_each_free_object(__p, __s, __free) \
324 for (__p = (__free); __p; __p = get_freepointer((__s), __p)) 357 for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
358 __p))
325 359
326/* Determine object index from a given position */ 360/* Determine object index from a given position */
327static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 361static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -473,7 +507,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
473static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 507static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
474{ 508{
475 unsigned int off; /* Offset of last byte */ 509 unsigned int off; /* Offset of last byte */
476 u8 *addr = page_address(page); 510 u8 *addr = slab_address(page);
477 511
478 print_tracking(s, p); 512 print_tracking(s, p);
479 513
@@ -651,7 +685,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
651 if (!(s->flags & SLAB_POISON)) 685 if (!(s->flags & SLAB_POISON))
652 return 1; 686 return 1;
653 687
654 start = page_address(page); 688 start = slab_address(page);
655 end = start + (PAGE_SIZE << s->order); 689 end = start + (PAGE_SIZE << s->order);
656 length = s->objects * s->size; 690 length = s->objects * s->size;
657 remainder = end - (start + length); 691 remainder = end - (start + length);
@@ -685,9 +719,10 @@ static int check_object(struct kmem_cache *s, struct page *page,
685 endobject, red, s->inuse - s->objsize)) 719 endobject, red, s->inuse - s->objsize))
686 return 0; 720 return 0;
687 } else { 721 } else {
688 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) 722 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
689 check_bytes_and_report(s, page, p, "Alignment padding", endobject, 723 check_bytes_and_report(s, page, p, "Alignment padding",
690 POISON_INUSE, s->inuse - s->objsize); 724 endobject, POISON_INUSE, s->inuse - s->objsize);
725 }
691 } 726 }
692 727
693 if (s->flags & SLAB_POISON) { 728 if (s->flags & SLAB_POISON) {
@@ -718,7 +753,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
718 * of the free objects in this slab. May cause 753 * of the free objects in this slab. May cause
719 * another error because the object count is now wrong. 754 * another error because the object count is now wrong.
720 */ 755 */
721 set_freepointer(s, p, NULL); 756 set_freepointer(s, p, page->end);
722 return 0; 757 return 0;
723 } 758 }
724 return 1; 759 return 1;
@@ -752,18 +787,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
752 void *fp = page->freelist; 787 void *fp = page->freelist;
753 void *object = NULL; 788 void *object = NULL;
754 789
755 while (fp && nr <= s->objects) { 790 while (fp != page->end && nr <= s->objects) {
756 if (fp == search) 791 if (fp == search)
757 return 1; 792 return 1;
758 if (!check_valid_pointer(s, page, fp)) { 793 if (!check_valid_pointer(s, page, fp)) {
759 if (object) { 794 if (object) {
760 object_err(s, page, object, 795 object_err(s, page, object,
761 "Freechain corrupt"); 796 "Freechain corrupt");
762 set_freepointer(s, object, NULL); 797 set_freepointer(s, object, page->end);
763 break; 798 break;
764 } else { 799 } else {
765 slab_err(s, page, "Freepointer corrupt"); 800 slab_err(s, page, "Freepointer corrupt");
766 page->freelist = NULL; 801 page->freelist = page->end;
767 page->inuse = s->objects; 802 page->inuse = s->objects;
768 slab_fix(s, "Freelist cleared"); 803 slab_fix(s, "Freelist cleared");
769 return 0; 804 return 0;
@@ -869,7 +904,7 @@ bad:
869 */ 904 */
870 slab_fix(s, "Marking all objects used"); 905 slab_fix(s, "Marking all objects used");
871 page->inuse = s->objects; 906 page->inuse = s->objects;
872 page->freelist = NULL; 907 page->freelist = page->end;
873 } 908 }
874 return 0; 909 return 0;
875} 910}
@@ -894,11 +929,10 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
894 return 0; 929 return 0;
895 930
896 if (unlikely(s != page->slab)) { 931 if (unlikely(s != page->slab)) {
897 if (!PageSlab(page)) 932 if (!PageSlab(page)) {
898 slab_err(s, page, "Attempt to free object(0x%p) " 933 slab_err(s, page, "Attempt to free object(0x%p) "
899 "outside of slab", object); 934 "outside of slab", object);
900 else 935 } else if (!page->slab) {
901 if (!page->slab) {
902 printk(KERN_ERR 936 printk(KERN_ERR
903 "SLUB <none>: no slab for object 0x%p.\n", 937 "SLUB <none>: no slab for object 0x%p.\n",
904 object); 938 object);
@@ -910,7 +944,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
910 } 944 }
911 945
912 /* Special debug activities for freeing objects */ 946 /* Special debug activities for freeing objects */
913 if (!SlabFrozen(page) && !page->freelist) 947 if (!SlabFrozen(page) && page->freelist == page->end)
914 remove_full(s, page); 948 remove_full(s, page);
915 if (s->flags & SLAB_STORE_USER) 949 if (s->flags & SLAB_STORE_USER)
916 set_track(s, object, TRACK_FREE, addr); 950 set_track(s, object, TRACK_FREE, addr);
@@ -1007,7 +1041,7 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
1007 */ 1041 */
1008 if (slub_debug && (!slub_debug_slabs || 1042 if (slub_debug && (!slub_debug_slabs ||
1009 strncmp(slub_debug_slabs, name, 1043 strncmp(slub_debug_slabs, name,
1010 strlen(slub_debug_slabs)) == 0)) 1044 strlen(slub_debug_slabs)) == 0))
1011 flags |= slub_debug; 1045 flags |= slub_debug;
1012 } 1046 }
1013 1047
@@ -1102,6 +1136,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1102 SetSlabDebug(page); 1136 SetSlabDebug(page);
1103 1137
1104 start = page_address(page); 1138 start = page_address(page);
1139 page->end = start + 1;
1105 1140
1106 if (unlikely(s->flags & SLAB_POISON)) 1141 if (unlikely(s->flags & SLAB_POISON))
1107 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1142 memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1113,7 +1148,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1113 last = p; 1148 last = p;
1114 } 1149 }
1115 setup_object(s, page, last); 1150 setup_object(s, page, last);
1116 set_freepointer(s, last, NULL); 1151 set_freepointer(s, last, page->end);
1117 1152
1118 page->freelist = start; 1153 page->freelist = start;
1119 page->inuse = 0; 1154 page->inuse = 0;
@@ -1129,7 +1164,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1129 void *p; 1164 void *p;
1130 1165
1131 slab_pad_check(s, page); 1166 slab_pad_check(s, page);
1132 for_each_object(p, s, page_address(page)) 1167 for_each_object(p, s, slab_address(page))
1133 check_object(s, page, p, 0); 1168 check_object(s, page, p, 0);
1134 ClearSlabDebug(page); 1169 ClearSlabDebug(page);
1135 } 1170 }
@@ -1139,6 +1174,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1139 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1174 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1140 -pages); 1175 -pages);
1141 1176
1177 page->mapping = NULL;
1142 __free_pages(page, s->order); 1178 __free_pages(page, s->order);
1143} 1179}
1144 1180
@@ -1183,7 +1219,7 @@ static __always_inline void slab_lock(struct page *page)
1183 1219
1184static __always_inline void slab_unlock(struct page *page) 1220static __always_inline void slab_unlock(struct page *page)
1185{ 1221{
1186 bit_spin_unlock(PG_locked, &page->flags); 1222 __bit_spin_unlock(PG_locked, &page->flags);
1187} 1223}
1188 1224
1189static __always_inline int slab_trylock(struct page *page) 1225static __always_inline int slab_trylock(struct page *page)
@@ -1294,8 +1330,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1294 get_cycles() % 1024 > s->remote_node_defrag_ratio) 1330 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1295 return NULL; 1331 return NULL;
1296 1332
1297 zonelist = &NODE_DATA(slab_node(current->mempolicy)) 1333 zonelist = &NODE_DATA(
1298 ->node_zonelists[gfp_zone(flags)]; 1334 slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)];
1299 for (z = zonelist->zones; *z; z++) { 1335 for (z = zonelist->zones; *z; z++) {
1300 struct kmem_cache_node *n; 1336 struct kmem_cache_node *n;
1301 1337
@@ -1337,17 +1373,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1337static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1373static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1338{ 1374{
1339 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1375 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1376 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
1340 1377
1341 ClearSlabFrozen(page); 1378 ClearSlabFrozen(page);
1342 if (page->inuse) { 1379 if (page->inuse) {
1343 1380
1344 if (page->freelist) 1381 if (page->freelist != page->end) {
1345 add_partial(n, page, tail); 1382 add_partial(n, page, tail);
1346 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1383 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1347 add_full(n, page); 1384 } else {
1385 stat(c, DEACTIVATE_FULL);
1386 if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
1387 add_full(n, page);
1388 }
1348 slab_unlock(page); 1389 slab_unlock(page);
1349
1350 } else { 1390 } else {
1391 stat(c, DEACTIVATE_EMPTY);
1351 if (n->nr_partial < MIN_PARTIAL) { 1392 if (n->nr_partial < MIN_PARTIAL) {
1352 /* 1393 /*
1353 * Adding an empty slab to the partial slabs in order 1394 * Adding an empty slab to the partial slabs in order
@@ -1361,6 +1402,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1361 slab_unlock(page); 1402 slab_unlock(page);
1362 } else { 1403 } else {
1363 slab_unlock(page); 1404 slab_unlock(page);
1405 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
1364 discard_slab(s, page); 1406 discard_slab(s, page);
1365 } 1407 }
1366 } 1408 }
@@ -1373,12 +1415,19 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1373{ 1415{
1374 struct page *page = c->page; 1416 struct page *page = c->page;
1375 int tail = 1; 1417 int tail = 1;
1418
1419 if (c->freelist)
1420 stat(c, DEACTIVATE_REMOTE_FREES);
1376 /* 1421 /*
1377 * Merge cpu freelist into freelist. Typically we get here 1422 * Merge cpu freelist into freelist. Typically we get here
1378 * because both freelists are empty. So this is unlikely 1423 * because both freelists are empty. So this is unlikely
1379 * to occur. 1424 * to occur.
1425 *
1426 * We need to use _is_end here because deactivate slab may
1427 * be called for a debug slab. Then c->freelist may contain
1428 * a dummy pointer.
1380 */ 1429 */
1381 while (unlikely(c->freelist)) { 1430 while (unlikely(!is_end(c->freelist))) {
1382 void **object; 1431 void **object;
1383 1432
1384 tail = 0; /* Hot objects. Put the slab first */ 1433 tail = 0; /* Hot objects. Put the slab first */
@@ -1398,6 +1447,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1398 1447
1399static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1448static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1400{ 1449{
1450 stat(c, CPUSLAB_FLUSH);
1401 slab_lock(c->page); 1451 slab_lock(c->page);
1402 deactivate_slab(s, c); 1452 deactivate_slab(s, c);
1403} 1453}
@@ -1469,16 +1519,21 @@ static void *__slab_alloc(struct kmem_cache *s,
1469{ 1519{
1470 void **object; 1520 void **object;
1471 struct page *new; 1521 struct page *new;
1522#ifdef SLUB_FASTPATH
1523 unsigned long flags;
1472 1524
1525 local_irq_save(flags);
1526#endif
1473 if (!c->page) 1527 if (!c->page)
1474 goto new_slab; 1528 goto new_slab;
1475 1529
1476 slab_lock(c->page); 1530 slab_lock(c->page);
1477 if (unlikely(!node_match(c, node))) 1531 if (unlikely(!node_match(c, node)))
1478 goto another_slab; 1532 goto another_slab;
1533 stat(c, ALLOC_REFILL);
1479load_freelist: 1534load_freelist:
1480 object = c->page->freelist; 1535 object = c->page->freelist;
1481 if (unlikely(!object)) 1536 if (unlikely(object == c->page->end))
1482 goto another_slab; 1537 goto another_slab;
1483 if (unlikely(SlabDebug(c->page))) 1538 if (unlikely(SlabDebug(c->page)))
1484 goto debug; 1539 goto debug;
@@ -1486,9 +1541,15 @@ load_freelist:
1486 object = c->page->freelist; 1541 object = c->page->freelist;
1487 c->freelist = object[c->offset]; 1542 c->freelist = object[c->offset];
1488 c->page->inuse = s->objects; 1543 c->page->inuse = s->objects;
1489 c->page->freelist = NULL; 1544 c->page->freelist = c->page->end;
1490 c->node = page_to_nid(c->page); 1545 c->node = page_to_nid(c->page);
1546unlock_out:
1491 slab_unlock(c->page); 1547 slab_unlock(c->page);
1548 stat(c, ALLOC_SLOWPATH);
1549out:
1550#ifdef SLUB_FASTPATH
1551 local_irq_restore(flags);
1552#endif
1492 return object; 1553 return object;
1493 1554
1494another_slab: 1555another_slab:
@@ -1498,6 +1559,7 @@ new_slab:
1498 new = get_partial(s, gfpflags, node); 1559 new = get_partial(s, gfpflags, node);
1499 if (new) { 1560 if (new) {
1500 c->page = new; 1561 c->page = new;
1562 stat(c, ALLOC_FROM_PARTIAL);
1501 goto load_freelist; 1563 goto load_freelist;
1502 } 1564 }
1503 1565
@@ -1511,6 +1573,7 @@ new_slab:
1511 1573
1512 if (new) { 1574 if (new) {
1513 c = get_cpu_slab(s, smp_processor_id()); 1575 c = get_cpu_slab(s, smp_processor_id());
1576 stat(c, ALLOC_SLAB);
1514 if (c->page) 1577 if (c->page)
1515 flush_slab(s, c); 1578 flush_slab(s, c);
1516 slab_lock(new); 1579 slab_lock(new);
@@ -1518,7 +1581,8 @@ new_slab:
1518 c->page = new; 1581 c->page = new;
1519 goto load_freelist; 1582 goto load_freelist;
1520 } 1583 }
1521 return NULL; 1584 object = NULL;
1585 goto out;
1522debug: 1586debug:
1523 object = c->page->freelist; 1587 object = c->page->freelist;
1524 if (!alloc_debug_processing(s, c->page, object, addr)) 1588 if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1527,8 +1591,7 @@ debug:
1527 c->page->inuse++; 1591 c->page->inuse++;
1528 c->page->freelist = object[c->offset]; 1592 c->page->freelist = object[c->offset];
1529 c->node = -1; 1593 c->node = -1;
1530 slab_unlock(c->page); 1594 goto unlock_out;
1531 return object;
1532} 1595}
1533 1596
1534/* 1597/*
@@ -1545,20 +1608,50 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1545 gfp_t gfpflags, int node, void *addr) 1608 gfp_t gfpflags, int node, void *addr)
1546{ 1609{
1547 void **object; 1610 void **object;
1548 unsigned long flags;
1549 struct kmem_cache_cpu *c; 1611 struct kmem_cache_cpu *c;
1550 1612
1613/*
1614 * The SLUB_FASTPATH path is provisional and is currently disabled if the
1615 * kernel is compiled with preemption or if the arch does not support
1616 * fast cmpxchg operations. There are a couple of coming changes that will
1617 * simplify matters and allow preemption. Ultimately we may end up making
1618 * SLUB_FASTPATH the default.
1619 *
1620 * 1. The introduction of the per cpu allocator will avoid array lookups
1621 * through get_cpu_slab(). A special register can be used instead.
1622 *
1623 * 2. The introduction of per cpu atomic operations (cpu_ops) means that
1624 * we can realize the logic here entirely with per cpu atomics. The
1625 * per cpu atomic ops will take care of the preemption issues.
1626 */
1627
1628#ifdef SLUB_FASTPATH
1629 c = get_cpu_slab(s, raw_smp_processor_id());
1630 do {
1631 object = c->freelist;
1632 if (unlikely(is_end(object) || !node_match(c, node))) {
1633 object = __slab_alloc(s, gfpflags, node, addr, c);
1634 break;
1635 }
1636 stat(c, ALLOC_FASTPATH);
1637 } while (cmpxchg_local(&c->freelist, object, object[c->offset])
1638 != object);
1639#else
1640 unsigned long flags;
1641
1551 local_irq_save(flags); 1642 local_irq_save(flags);
1552 c = get_cpu_slab(s, smp_processor_id()); 1643 c = get_cpu_slab(s, smp_processor_id());
1553 if (unlikely(!c->freelist || !node_match(c, node))) 1644 if (unlikely(is_end(c->freelist) || !node_match(c, node)))
1554 1645
1555 object = __slab_alloc(s, gfpflags, node, addr, c); 1646 object = __slab_alloc(s, gfpflags, node, addr, c);
1556 1647
1557 else { 1648 else {
1558 object = c->freelist; 1649 object = c->freelist;
1559 c->freelist = object[c->offset]; 1650 c->freelist = object[c->offset];
1651 stat(c, ALLOC_FASTPATH);
1560 } 1652 }
1561 local_irq_restore(flags); 1653 local_irq_restore(flags);
1654#endif
1562 1655
1563 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1656 if (unlikely((gfpflags & __GFP_ZERO) && object))
1564 memset(object, 0, c->objsize); 1657 memset(object, 0, c->objsize);
@@ -1593,7 +1686,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1593{ 1686{
1594 void *prior; 1687 void *prior;
1595 void **object = (void *)x; 1688 void **object = (void *)x;
1689 struct kmem_cache_cpu *c;
1690
1691#ifdef SLUB_FASTPATH
1692 unsigned long flags;
1596 1693
1694 local_irq_save(flags);
1695#endif
1696 c = get_cpu_slab(s, raw_smp_processor_id());
1697 stat(c, FREE_SLOWPATH);
1597 slab_lock(page); 1698 slab_lock(page);
1598 1699
1599 if (unlikely(SlabDebug(page))) 1700 if (unlikely(SlabDebug(page)))
@@ -1603,8 +1704,10 @@ checks_ok:
1603 page->freelist = object; 1704 page->freelist = object;
1604 page->inuse--; 1705 page->inuse--;
1605 1706
1606 if (unlikely(SlabFrozen(page))) 1707 if (unlikely(SlabFrozen(page))) {
1708 stat(c, FREE_FROZEN);
1607 goto out_unlock; 1709 goto out_unlock;
1710 }
1608 1711
1609 if (unlikely(!page->inuse)) 1712 if (unlikely(!page->inuse))
1610 goto slab_empty; 1713 goto slab_empty;
@@ -1614,21 +1717,31 @@ checks_ok:
1614 * was not on the partial list before 1717 * was not on the partial list before
1615 * then add it. 1718 * then add it.
1616 */ 1719 */
1617 if (unlikely(!prior)) 1720 if (unlikely(prior == page->end)) {
1618 add_partial(get_node(s, page_to_nid(page)), page, 1); 1721 add_partial(get_node(s, page_to_nid(page)), page, 1);
1722 stat(c, FREE_ADD_PARTIAL);
1723 }
1619 1724
1620out_unlock: 1725out_unlock:
1621 slab_unlock(page); 1726 slab_unlock(page);
1727#ifdef SLUB_FASTPATH
1728 local_irq_restore(flags);
1729#endif
1622 return; 1730 return;
1623 1731
1624slab_empty: 1732slab_empty:
1625 if (prior) 1733 if (prior != page->end) {
1626 /* 1734 /*
1627 * Slab still on the partial list. 1735 * Slab still on the partial list.
1628 */ 1736 */
1629 remove_partial(s, page); 1737 remove_partial(s, page);
1630 1738 stat(c, FREE_REMOVE_PARTIAL);
1739 }
1631 slab_unlock(page); 1740 slab_unlock(page);
1741 stat(c, FREE_SLAB);
1742#ifdef SLUB_FASTPATH
1743 local_irq_restore(flags);
1744#endif
1632 discard_slab(s, page); 1745 discard_slab(s, page);
1633 return; 1746 return;
1634 1747
@@ -1653,19 +1766,49 @@ static __always_inline void slab_free(struct kmem_cache *s,
1653 struct page *page, void *x, void *addr) 1766 struct page *page, void *x, void *addr)
1654{ 1767{
1655 void **object = (void *)x; 1768 void **object = (void *)x;
1656 unsigned long flags;
1657 struct kmem_cache_cpu *c; 1769 struct kmem_cache_cpu *c;
1658 1770
1771#ifdef SLUB_FASTPATH
1772 void **freelist;
1773
1774 c = get_cpu_slab(s, raw_smp_processor_id());
1775 debug_check_no_locks_freed(object, s->objsize);
1776 do {
1777 freelist = c->freelist;
1778 barrier();
1779 /*
1780 * If the compiler would reorder the retrieval of c->page to
1781 * come before c->freelist then an interrupt could
1782 * change the cpu slab before we retrieve c->freelist. We
1783 * could be matching on a page no longer active and put the
1784 * object onto the freelist of the wrong slab.
1785 *
1786 * On the other hand: If we already have the freelist pointer
1787 * then any change of cpu_slab will cause the cmpxchg to fail
1788 * since the freelist pointers are unique per slab.
1789 */
1790 if (unlikely(page != c->page || c->node < 0)) {
1791 __slab_free(s, page, x, addr, c->offset);
1792 break;
1793 }
1794 object[c->offset] = freelist;
1795 stat(c, FREE_FASTPATH);
1796 } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
1797#else
1798 unsigned long flags;
1799
1659 local_irq_save(flags); 1800 local_irq_save(flags);
1660 debug_check_no_locks_freed(object, s->objsize); 1801 debug_check_no_locks_freed(object, s->objsize);
1661 c = get_cpu_slab(s, smp_processor_id()); 1802 c = get_cpu_slab(s, smp_processor_id());
1662 if (likely(page == c->page && c->node >= 0)) { 1803 if (likely(page == c->page && c->node >= 0)) {
1663 object[c->offset] = c->freelist; 1804 object[c->offset] = c->freelist;
1664 c->freelist = object; 1805 c->freelist = object;
1806 stat(c, FREE_FASTPATH);
1665 } else 1807 } else
1666 __slab_free(s, page, x, addr, c->offset); 1808 __slab_free(s, page, x, addr, c->offset);
1667 1809
1668 local_irq_restore(flags); 1810 local_irq_restore(flags);
1811#endif
1669} 1812}
1670 1813
1671void kmem_cache_free(struct kmem_cache *s, void *x) 1814void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -1842,7 +1985,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
1842 struct kmem_cache_cpu *c) 1985 struct kmem_cache_cpu *c)
1843{ 1986{
1844 c->page = NULL; 1987 c->page = NULL;
1845 c->freelist = NULL; 1988 c->freelist = (void *)PAGE_MAPPING_ANON;
1846 c->node = 0; 1989 c->node = 0;
1847 c->offset = s->offset / sizeof(void *); 1990 c->offset = s->offset / sizeof(void *);
1848 c->objsize = s->objsize; 1991 c->objsize = s->objsize;
@@ -2446,7 +2589,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2446 goto unlock_out; 2589 goto unlock_out;
2447 2590
2448 realsize = kmalloc_caches[index].objsize; 2591 realsize = kmalloc_caches[index].objsize;
2449 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize), 2592 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2593 (unsigned int)realsize);
2450 s = kmalloc(kmem_size, flags & ~SLUB_DMA); 2594 s = kmalloc(kmem_size, flags & ~SLUB_DMA);
2451 2595
2452 if (!s || !text || !kmem_cache_open(s, flags, text, 2596 if (!s || !text || !kmem_cache_open(s, flags, text,
@@ -2601,6 +2745,7 @@ EXPORT_SYMBOL(ksize);
2601void kfree(const void *x) 2745void kfree(const void *x)
2602{ 2746{
2603 struct page *page; 2747 struct page *page;
2748 void *object = (void *)x;
2604 2749
2605 if (unlikely(ZERO_OR_NULL_PTR(x))) 2750 if (unlikely(ZERO_OR_NULL_PTR(x)))
2606 return; 2751 return;
@@ -2610,7 +2755,7 @@ void kfree(const void *x)
2610 put_page(page); 2755 put_page(page);
2611 return; 2756 return;
2612 } 2757 }
2613 slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); 2758 slab_free(page->slab, page, object, __builtin_return_address(0));
2614} 2759}
2615EXPORT_SYMBOL(kfree); 2760EXPORT_SYMBOL(kfree);
2616 2761
@@ -2896,7 +3041,8 @@ void __init kmem_cache_init(void)
2896#endif 3041#endif
2897 3042
2898 3043
2899 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 3044 printk(KERN_INFO
3045 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2900 " CPUs=%d, Nodes=%d\n", 3046 " CPUs=%d, Nodes=%d\n",
2901 caches, cache_line_size(), 3047 caches, cache_line_size(),
2902 slub_min_order, slub_max_order, slub_min_objects, 3048 slub_min_order, slub_max_order, slub_min_objects,
@@ -3063,7 +3209,7 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3063} 3209}
3064 3210
3065static struct notifier_block __cpuinitdata slab_notifier = { 3211static struct notifier_block __cpuinitdata slab_notifier = {
3066 &slab_cpuup_callback, NULL, 0 3212 .notifier_call = slab_cpuup_callback
3067}; 3213};
3068 3214
3069#endif 3215#endif
@@ -3104,7 +3250,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3104 unsigned long *map) 3250 unsigned long *map)
3105{ 3251{
3106 void *p; 3252 void *p;
3107 void *addr = page_address(page); 3253 void *addr = slab_address(page);
3108 3254
3109 if (!check_slab(s, page) || 3255 if (!check_slab(s, page) ||
3110 !on_freelist(s, page, NULL)) 3256 !on_freelist(s, page, NULL))
@@ -3221,8 +3367,9 @@ static void resiliency_test(void)
3221 p = kzalloc(32, GFP_KERNEL); 3367 p = kzalloc(32, GFP_KERNEL);
3222 p[32 + sizeof(void *)] = 0x34; 3368 p[32 + sizeof(void *)] = 0x34;
3223 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" 3369 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3224 " 0x34 -> -0x%p\n", p); 3370 " 0x34 -> -0x%p\n", p);
3225 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 3371 printk(KERN_ERR
3372 "If allocated object is overwritten then not detectable\n\n");
3226 3373
3227 validate_slab_cache(kmalloc_caches + 5); 3374 validate_slab_cache(kmalloc_caches + 5);
3228 p = kzalloc(64, GFP_KERNEL); 3375 p = kzalloc(64, GFP_KERNEL);
@@ -3230,7 +3377,8 @@ static void resiliency_test(void)
3230 *p = 0x56; 3377 *p = 0x56;
3231 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", 3378 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3232 p); 3379 p);
3233 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 3380 printk(KERN_ERR
3381 "If allocated object is overwritten then not detectable\n\n");
3234 validate_slab_cache(kmalloc_caches + 6); 3382 validate_slab_cache(kmalloc_caches + 6);
3235 3383
3236 printk(KERN_ERR "\nB. Corruption after free\n"); 3384 printk(KERN_ERR "\nB. Corruption after free\n");
@@ -3243,7 +3391,8 @@ static void resiliency_test(void)
3243 p = kzalloc(256, GFP_KERNEL); 3391 p = kzalloc(256, GFP_KERNEL);
3244 kfree(p); 3392 kfree(p);
3245 p[50] = 0x9a; 3393 p[50] = 0x9a;
3246 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); 3394 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
3395 p);
3247 validate_slab_cache(kmalloc_caches + 8); 3396 validate_slab_cache(kmalloc_caches + 8);
3248 3397
3249 p = kzalloc(512, GFP_KERNEL); 3398 p = kzalloc(512, GFP_KERNEL);
@@ -3384,7 +3533,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
3384static void process_slab(struct loc_track *t, struct kmem_cache *s, 3533static void process_slab(struct loc_track *t, struct kmem_cache *s,
3385 struct page *page, enum track_item alloc) 3534 struct page *page, enum track_item alloc)
3386{ 3535{
3387 void *addr = page_address(page); 3536 void *addr = slab_address(page);
3388 DECLARE_BITMAP(map, s->objects); 3537 DECLARE_BITMAP(map, s->objects);
3389 void *p; 3538 void *p;
3390 3539
@@ -3872,6 +4021,62 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
3872SLAB_ATTR(remote_node_defrag_ratio); 4021SLAB_ATTR(remote_node_defrag_ratio);
3873#endif 4022#endif
3874 4023
4024#ifdef CONFIG_SLUB_STATS
4025
4026static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4027{
4028 unsigned long sum = 0;
4029 int cpu;
4030 int len;
4031 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4032
4033 if (!data)
4034 return -ENOMEM;
4035
4036 for_each_online_cpu(cpu) {
4037 unsigned x = get_cpu_slab(s, cpu)->stat[si];
4038
4039 data[cpu] = x;
4040 sum += x;
4041 }
4042
4043 len = sprintf(buf, "%lu", sum);
4044
4045 for_each_online_cpu(cpu) {
4046 if (data[cpu] && len < PAGE_SIZE - 20)
4047 len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
4048 }
4049 kfree(data);
4050 return len + sprintf(buf + len, "\n");
4051}
4052
4053#define STAT_ATTR(si, text) \
4054static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4055{ \
4056 return show_stat(s, buf, si); \
4057} \
4058SLAB_ATTR_RO(text); \
4059
4060STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4061STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4062STAT_ATTR(FREE_FASTPATH, free_fastpath);
4063STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4064STAT_ATTR(FREE_FROZEN, free_frozen);
4065STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4066STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4067STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4068STAT_ATTR(ALLOC_SLAB, alloc_slab);
4069STAT_ATTR(ALLOC_REFILL, alloc_refill);
4070STAT_ATTR(FREE_SLAB, free_slab);
4071STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4072STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4073STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4074STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4075STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4076STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4077
4078#endif
4079
3875static struct attribute *slab_attrs[] = { 4080static struct attribute *slab_attrs[] = {
3876 &slab_size_attr.attr, 4081 &slab_size_attr.attr,
3877 &object_size_attr.attr, 4082 &object_size_attr.attr,
@@ -3902,6 +4107,25 @@ static struct attribute *slab_attrs[] = {
3902#ifdef CONFIG_NUMA 4107#ifdef CONFIG_NUMA
3903 &remote_node_defrag_ratio_attr.attr, 4108 &remote_node_defrag_ratio_attr.attr,
3904#endif 4109#endif
4110#ifdef CONFIG_SLUB_STATS
4111 &alloc_fastpath_attr.attr,
4112 &alloc_slowpath_attr.attr,
4113 &free_fastpath_attr.attr,
4114 &free_slowpath_attr.attr,
4115 &free_frozen_attr.attr,
4116 &free_add_partial_attr.attr,
4117 &free_remove_partial_attr.attr,
4118 &alloc_from_partial_attr.attr,
4119 &alloc_slab_attr.attr,
4120 &alloc_refill_attr.attr,
4121 &free_slab_attr.attr,
4122 &cpuslab_flush_attr.attr,
4123 &deactivate_full_attr.attr,
4124 &deactivate_empty_attr.attr,
4125 &deactivate_to_head_attr.attr,
4126 &deactivate_to_tail_attr.attr,
4127 &deactivate_remote_frees_attr.attr,
4128#endif
3905 NULL 4129 NULL
3906}; 4130};
3907 4131