diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 21:22:29 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 21:22:29 -0500 |
commit | c00f08d705e149fbfaf7a252b4d4fbb7affdcc96 (patch) | |
tree | 8c916856376d0d400ddda239d5be386f9b9516d7 | |
parent | c8b6de16d9434405e5832b8772e4f986ddd5118e (diff) | |
parent | 3adbefee6fd58a061b2bf1df4f3769701860fc62 (diff) |
Merge branch 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm
* 'slub-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/christoph/vm:
SLUB: fix checkpatch warnings
Use non atomic unlock
SLUB: Support for performance statistics
SLUB: Alternate fast paths using cmpxchg_local
SLUB: Use unique end pointer for each slab page.
SLUB: Deal with annoying gcc warning on kfree()
-rw-r--r-- | Documentation/vm/slabinfo.c | 149 | ||||
-rw-r--r-- | arch/x86/Kconfig | 4 | ||||
-rw-r--r-- | include/linux/mm_types.h | 5 | ||||
-rw-r--r-- | include/linux/slub_def.h | 23 | ||||
-rw-r--r-- | lib/Kconfig.debug | 13 | ||||
-rw-r--r-- | mm/slub.c | 326 |
6 files changed, 457 insertions, 63 deletions
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c index 488c1f31b992..7123fee708ca 100644 --- a/Documentation/vm/slabinfo.c +++ b/Documentation/vm/slabinfo.c | |||
@@ -32,6 +32,13 @@ struct slabinfo { | |||
32 | int sanity_checks, slab_size, store_user, trace; | 32 | int sanity_checks, slab_size, store_user, trace; |
33 | int order, poison, reclaim_account, red_zone; | 33 | int order, poison, reclaim_account, red_zone; |
34 | unsigned long partial, objects, slabs; | 34 | unsigned long partial, objects, slabs; |
35 | unsigned long alloc_fastpath, alloc_slowpath; | ||
36 | unsigned long free_fastpath, free_slowpath; | ||
37 | unsigned long free_frozen, free_add_partial, free_remove_partial; | ||
38 | unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill; | ||
39 | unsigned long cpuslab_flush, deactivate_full, deactivate_empty; | ||
40 | unsigned long deactivate_to_head, deactivate_to_tail; | ||
41 | unsigned long deactivate_remote_frees; | ||
35 | int numa[MAX_NODES]; | 42 | int numa[MAX_NODES]; |
36 | int numa_partial[MAX_NODES]; | 43 | int numa_partial[MAX_NODES]; |
37 | } slabinfo[MAX_SLABS]; | 44 | } slabinfo[MAX_SLABS]; |
@@ -64,8 +71,10 @@ int show_inverted = 0; | |||
64 | int show_single_ref = 0; | 71 | int show_single_ref = 0; |
65 | int show_totals = 0; | 72 | int show_totals = 0; |
66 | int sort_size = 0; | 73 | int sort_size = 0; |
74 | int sort_active = 0; | ||
67 | int set_debug = 0; | 75 | int set_debug = 0; |
68 | int show_ops = 0; | 76 | int show_ops = 0; |
77 | int show_activity = 0; | ||
69 | 78 | ||
70 | /* Debug options */ | 79 | /* Debug options */ |
71 | int sanity = 0; | 80 | int sanity = 0; |
@@ -93,8 +102,10 @@ void usage(void) | |||
93 | printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" | 102 | printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n" |
94 | "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" | 103 | "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" |
95 | "-a|--aliases Show aliases\n" | 104 | "-a|--aliases Show aliases\n" |
105 | "-A|--activity Most active slabs first\n" | ||
96 | "-d<options>|--debug=<options> Set/Clear Debug options\n" | 106 | "-d<options>|--debug=<options> Set/Clear Debug options\n" |
97 | "-e|--empty Show empty slabs\n" | 107 | "-D|--display-active Switch line format to activity\n" |
108 | "-e|--empty Show empty slabs\n" | ||
98 | "-f|--first-alias Show first alias\n" | 109 | "-f|--first-alias Show first alias\n" |
99 | "-h|--help Show usage information\n" | 110 | "-h|--help Show usage information\n" |
100 | "-i|--inverted Inverted list\n" | 111 | "-i|--inverted Inverted list\n" |
@@ -281,8 +292,11 @@ int line = 0; | |||
281 | 292 | ||
282 | void first_line(void) | 293 | void first_line(void) |
283 | { | 294 | { |
284 | printf("Name Objects Objsize Space " | 295 | if (show_activity) |
285 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); | 296 | printf("Name Objects Alloc Free %%Fast\n"); |
297 | else | ||
298 | printf("Name Objects Objsize Space " | ||
299 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); | ||
286 | } | 300 | } |
287 | 301 | ||
288 | /* | 302 | /* |
@@ -309,6 +323,12 @@ unsigned long slab_size(struct slabinfo *s) | |||
309 | return s->slabs * (page_size << s->order); | 323 | return s->slabs * (page_size << s->order); |
310 | } | 324 | } |
311 | 325 | ||
326 | unsigned long slab_activity(struct slabinfo *s) | ||
327 | { | ||
328 | return s->alloc_fastpath + s->free_fastpath + | ||
329 | s->alloc_slowpath + s->free_slowpath; | ||
330 | } | ||
331 | |||
312 | void slab_numa(struct slabinfo *s, int mode) | 332 | void slab_numa(struct slabinfo *s, int mode) |
313 | { | 333 | { |
314 | int node; | 334 | int node; |
@@ -392,6 +412,71 @@ const char *onoff(int x) | |||
392 | return "Off"; | 412 | return "Off"; |
393 | } | 413 | } |
394 | 414 | ||
415 | void slab_stats(struct slabinfo *s) | ||
416 | { | ||
417 | unsigned long total_alloc; | ||
418 | unsigned long total_free; | ||
419 | unsigned long total; | ||
420 | |||
421 | if (!s->alloc_slab) | ||
422 | return; | ||
423 | |||
424 | total_alloc = s->alloc_fastpath + s->alloc_slowpath; | ||
425 | total_free = s->free_fastpath + s->free_slowpath; | ||
426 | |||
427 | if (!total_alloc) | ||
428 | return; | ||
429 | |||
430 | printf("\n"); | ||
431 | printf("Slab Perf Counter Alloc Free %%Al %%Fr\n"); | ||
432 | printf("--------------------------------------------------\n"); | ||
433 | printf("Fastpath %8lu %8lu %3lu %3lu\n", | ||
434 | s->alloc_fastpath, s->free_fastpath, | ||
435 | s->alloc_fastpath * 100 / total_alloc, | ||
436 | s->free_fastpath * 100 / total_free); | ||
437 | printf("Slowpath %8lu %8lu %3lu %3lu\n", | ||
438 | total_alloc - s->alloc_fastpath, s->free_slowpath, | ||
439 | (total_alloc - s->alloc_fastpath) * 100 / total_alloc, | ||
440 | s->free_slowpath * 100 / total_free); | ||
441 | printf("Page Alloc %8lu %8lu %3lu %3lu\n", | ||
442 | s->alloc_slab, s->free_slab, | ||
443 | s->alloc_slab * 100 / total_alloc, | ||
444 | s->free_slab * 100 / total_free); | ||
445 | printf("Add partial %8lu %8lu %3lu %3lu\n", | ||
446 | s->deactivate_to_head + s->deactivate_to_tail, | ||
447 | s->free_add_partial, | ||
448 | (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc, | ||
449 | s->free_add_partial * 100 / total_free); | ||
450 | printf("Remove partial %8lu %8lu %3lu %3lu\n", | ||
451 | s->alloc_from_partial, s->free_remove_partial, | ||
452 | s->alloc_from_partial * 100 / total_alloc, | ||
453 | s->free_remove_partial * 100 / total_free); | ||
454 | |||
455 | printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n", | ||
456 | s->deactivate_remote_frees, s->free_frozen, | ||
457 | s->deactivate_remote_frees * 100 / total_alloc, | ||
458 | s->free_frozen * 100 / total_free); | ||
459 | |||
460 | printf("Total %8lu %8lu\n\n", total_alloc, total_free); | ||
461 | |||
462 | if (s->cpuslab_flush) | ||
463 | printf("Flushes %8lu\n", s->cpuslab_flush); | ||
464 | |||
465 | if (s->alloc_refill) | ||
466 | printf("Refill %8lu\n", s->alloc_refill); | ||
467 | |||
468 | total = s->deactivate_full + s->deactivate_empty + | ||
469 | s->deactivate_to_head + s->deactivate_to_tail; | ||
470 | |||
471 | if (total) | ||
472 | printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) " | ||
473 | "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n", | ||
474 | s->deactivate_full, (s->deactivate_full * 100) / total, | ||
475 | s->deactivate_empty, (s->deactivate_empty * 100) / total, | ||
476 | s->deactivate_to_head, (s->deactivate_to_head * 100) / total, | ||
477 | s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); | ||
478 | } | ||
479 | |||
395 | void report(struct slabinfo *s) | 480 | void report(struct slabinfo *s) |
396 | { | 481 | { |
397 | if (strcmp(s->name, "*") == 0) | 482 | if (strcmp(s->name, "*") == 0) |
@@ -430,6 +515,7 @@ void report(struct slabinfo *s) | |||
430 | ops(s); | 515 | ops(s); |
431 | show_tracking(s); | 516 | show_tracking(s); |
432 | slab_numa(s, 1); | 517 | slab_numa(s, 1); |
518 | slab_stats(s); | ||
433 | } | 519 | } |
434 | 520 | ||
435 | void slabcache(struct slabinfo *s) | 521 | void slabcache(struct slabinfo *s) |
@@ -479,13 +565,27 @@ void slabcache(struct slabinfo *s) | |||
479 | *p++ = 'T'; | 565 | *p++ = 'T'; |
480 | 566 | ||
481 | *p = 0; | 567 | *p = 0; |
482 | printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", | 568 | if (show_activity) { |
483 | s->name, s->objects, s->object_size, size_str, dist_str, | 569 | unsigned long total_alloc; |
484 | s->objs_per_slab, s->order, | 570 | unsigned long total_free; |
485 | s->slabs ? (s->partial * 100) / s->slabs : 100, | 571 | |
486 | s->slabs ? (s->objects * s->object_size * 100) / | 572 | total_alloc = s->alloc_fastpath + s->alloc_slowpath; |
487 | (s->slabs * (page_size << s->order)) : 100, | 573 | total_free = s->free_fastpath + s->free_slowpath; |
488 | flags); | 574 | |
575 | printf("%-21s %8ld %8ld %8ld %3ld %3ld \n", | ||
576 | s->name, s->objects, | ||
577 | total_alloc, total_free, | ||
578 | total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, | ||
579 | total_free ? (s->free_fastpath * 100 / total_free) : 0); | ||
580 | } | ||
581 | else | ||
582 | printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", | ||
583 | s->name, s->objects, s->object_size, size_str, dist_str, | ||
584 | s->objs_per_slab, s->order, | ||
585 | s->slabs ? (s->partial * 100) / s->slabs : 100, | ||
586 | s->slabs ? (s->objects * s->object_size * 100) / | ||
587 | (s->slabs * (page_size << s->order)) : 100, | ||
588 | flags); | ||
489 | } | 589 | } |
490 | 590 | ||
491 | /* | 591 | /* |
@@ -892,6 +992,8 @@ void sort_slabs(void) | |||
892 | 992 | ||
893 | if (sort_size) | 993 | if (sort_size) |
894 | result = slab_size(s1) < slab_size(s2); | 994 | result = slab_size(s1) < slab_size(s2); |
995 | else if (sort_active) | ||
996 | result = slab_activity(s1) < slab_activity(s2); | ||
895 | else | 997 | else |
896 | result = strcasecmp(s1->name, s2->name); | 998 | result = strcasecmp(s1->name, s2->name); |
897 | 999 | ||
@@ -1074,6 +1176,23 @@ void read_slab_dir(void) | |||
1074 | free(t); | 1176 | free(t); |
1075 | slab->store_user = get_obj("store_user"); | 1177 | slab->store_user = get_obj("store_user"); |
1076 | slab->trace = get_obj("trace"); | 1178 | slab->trace = get_obj("trace"); |
1179 | slab->alloc_fastpath = get_obj("alloc_fastpath"); | ||
1180 | slab->alloc_slowpath = get_obj("alloc_slowpath"); | ||
1181 | slab->free_fastpath = get_obj("free_fastpath"); | ||
1182 | slab->free_slowpath = get_obj("free_slowpath"); | ||
1183 | slab->free_frozen= get_obj("free_frozen"); | ||
1184 | slab->free_add_partial = get_obj("free_add_partial"); | ||
1185 | slab->free_remove_partial = get_obj("free_remove_partial"); | ||
1186 | slab->alloc_from_partial = get_obj("alloc_from_partial"); | ||
1187 | slab->alloc_slab = get_obj("alloc_slab"); | ||
1188 | slab->alloc_refill = get_obj("alloc_refill"); | ||
1189 | slab->free_slab = get_obj("free_slab"); | ||
1190 | slab->cpuslab_flush = get_obj("cpuslab_flush"); | ||
1191 | slab->deactivate_full = get_obj("deactivate_full"); | ||
1192 | slab->deactivate_empty = get_obj("deactivate_empty"); | ||
1193 | slab->deactivate_to_head = get_obj("deactivate_to_head"); | ||
1194 | slab->deactivate_to_tail = get_obj("deactivate_to_tail"); | ||
1195 | slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); | ||
1077 | chdir(".."); | 1196 | chdir(".."); |
1078 | if (slab->name[0] == ':') | 1197 | if (slab->name[0] == ':') |
1079 | alias_targets++; | 1198 | alias_targets++; |
@@ -1124,7 +1243,9 @@ void output_slabs(void) | |||
1124 | 1243 | ||
1125 | struct option opts[] = { | 1244 | struct option opts[] = { |
1126 | { "aliases", 0, NULL, 'a' }, | 1245 | { "aliases", 0, NULL, 'a' }, |
1246 | { "activity", 0, NULL, 'A' }, | ||
1127 | { "debug", 2, NULL, 'd' }, | 1247 | { "debug", 2, NULL, 'd' }, |
1248 | { "display-activity", 0, NULL, 'D' }, | ||
1128 | { "empty", 0, NULL, 'e' }, | 1249 | { "empty", 0, NULL, 'e' }, |
1129 | { "first-alias", 0, NULL, 'f' }, | 1250 | { "first-alias", 0, NULL, 'f' }, |
1130 | { "help", 0, NULL, 'h' }, | 1251 | { "help", 0, NULL, 'h' }, |
@@ -1149,7 +1270,7 @@ int main(int argc, char *argv[]) | |||
1149 | 1270 | ||
1150 | page_size = getpagesize(); | 1271 | page_size = getpagesize(); |
1151 | 1272 | ||
1152 | while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS", | 1273 | while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS", |
1153 | opts, NULL)) != -1) | 1274 | opts, NULL)) != -1) |
1154 | switch (c) { | 1275 | switch (c) { |
1155 | case '1': | 1276 | case '1': |
@@ -1158,11 +1279,17 @@ int main(int argc, char *argv[]) | |||
1158 | case 'a': | 1279 | case 'a': |
1159 | show_alias = 1; | 1280 | show_alias = 1; |
1160 | break; | 1281 | break; |
1282 | case 'A': | ||
1283 | sort_active = 1; | ||
1284 | break; | ||
1161 | case 'd': | 1285 | case 'd': |
1162 | set_debug = 1; | 1286 | set_debug = 1; |
1163 | if (!debug_opt_scan(optarg)) | 1287 | if (!debug_opt_scan(optarg)) |
1164 | fatal("Invalid debug option '%s'\n", optarg); | 1288 | fatal("Invalid debug option '%s'\n", optarg); |
1165 | break; | 1289 | break; |
1290 | case 'D': | ||
1291 | show_activity = 1; | ||
1292 | break; | ||
1166 | case 'e': | 1293 | case 'e': |
1167 | show_empty = 1; | 1294 | show_empty = 1; |
1168 | break; | 1295 | break; |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c95482b6b6dd..9d0acedf5f3f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -52,6 +52,10 @@ config HAVE_LATENCYTOP_SUPPORT | |||
52 | config SEMAPHORE_SLEEPERS | 52 | config SEMAPHORE_SLEEPERS |
53 | def_bool y | 53 | def_bool y |
54 | 54 | ||
55 | config FAST_CMPXCHG_LOCAL | ||
56 | bool | ||
57 | default y | ||
58 | |||
55 | config MMU | 59 | config MMU |
56 | def_bool y | 60 | def_bool y |
57 | 61 | ||
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 34023c65d466..bfee0bd1d435 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -64,7 +64,10 @@ struct page { | |||
64 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS | 64 | #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS |
65 | spinlock_t ptl; | 65 | spinlock_t ptl; |
66 | #endif | 66 | #endif |
67 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ | 67 | struct { |
68 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ | ||
69 | void *end; /* SLUB: end marker */ | ||
70 | }; | ||
68 | struct page *first_page; /* Compound tail pages */ | 71 | struct page *first_page; /* Compound tail pages */ |
69 | }; | 72 | }; |
70 | union { | 73 | union { |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index ddb1a706b144..5e6d3d634d5b 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -11,12 +11,35 @@ | |||
11 | #include <linux/workqueue.h> | 11 | #include <linux/workqueue.h> |
12 | #include <linux/kobject.h> | 12 | #include <linux/kobject.h> |
13 | 13 | ||
14 | enum stat_item { | ||
15 | ALLOC_FASTPATH, /* Allocation from cpu slab */ | ||
16 | ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ | ||
17 | FREE_FASTPATH, /* Free to cpu slub */ | ||
18 | FREE_SLOWPATH, /* Freeing not to cpu slab */ | ||
19 | FREE_FROZEN, /* Freeing to frozen slab */ | ||
20 | FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ | ||
21 | FREE_REMOVE_PARTIAL, /* Freeing removes last object */ | ||
22 | ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */ | ||
23 | ALLOC_SLAB, /* Cpu slab acquired from page allocator */ | ||
24 | ALLOC_REFILL, /* Refill cpu slab from slab freelist */ | ||
25 | FREE_SLAB, /* Slab freed to the page allocator */ | ||
26 | CPUSLAB_FLUSH, /* Abandoning of the cpu slab */ | ||
27 | DEACTIVATE_FULL, /* Cpu slab was full when deactivated */ | ||
28 | DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */ | ||
29 | DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ | ||
30 | DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ | ||
31 | DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ | ||
32 | NR_SLUB_STAT_ITEMS }; | ||
33 | |||
14 | struct kmem_cache_cpu { | 34 | struct kmem_cache_cpu { |
15 | void **freelist; /* Pointer to first free per cpu object */ | 35 | void **freelist; /* Pointer to first free per cpu object */ |
16 | struct page *page; /* The slab from which we are allocating */ | 36 | struct page *page; /* The slab from which we are allocating */ |
17 | int node; /* The node of the page (or -1 for debug) */ | 37 | int node; /* The node of the page (or -1 for debug) */ |
18 | unsigned int offset; /* Freepointer offset (in word units) */ | 38 | unsigned int offset; /* Freepointer offset (in word units) */ |
19 | unsigned int objsize; /* Size of an object (from kmem_cache) */ | 39 | unsigned int objsize; /* Size of an object (from kmem_cache) */ |
40 | #ifdef CONFIG_SLUB_STATS | ||
41 | unsigned stat[NR_SLUB_STAT_ITEMS]; | ||
42 | #endif | ||
20 | }; | 43 | }; |
21 | 44 | ||
22 | struct kmem_cache_node { | 45 | struct kmem_cache_node { |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0d385be682db..4f4008fc73e4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -205,6 +205,19 @@ config SLUB_DEBUG_ON | |||
205 | off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying | 205 | off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying |
206 | "slub_debug=-". | 206 | "slub_debug=-". |
207 | 207 | ||
208 | config SLUB_STATS | ||
209 | default n | ||
210 | bool "Enable SLUB performance statistics" | ||
211 | depends on SLUB | ||
212 | help | ||
213 | SLUB statistics are useful to debug SLUBs allocation behavior in | ||
214 | order find ways to optimize the allocator. This should never be | ||
215 | enabled for production use since keeping statistics slows down | ||
216 | the allocator by a few percentage points. The slabinfo command | ||
217 | supports the determination of the most active slabs to figure | ||
218 | out which slabs are relevant to a particular load. | ||
219 | Try running: slabinfo -DA | ||
220 | |||
208 | config DEBUG_PREEMPT | 221 | config DEBUG_PREEMPT |
209 | bool "Debug preemptible kernel" | 222 | bool "Debug preemptible kernel" |
210 | depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) | 223 | depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) |
@@ -149,6 +149,13 @@ static inline void ClearSlabDebug(struct page *page) | |||
149 | /* Enable to test recovery from slab corruption on boot */ | 149 | /* Enable to test recovery from slab corruption on boot */ |
150 | #undef SLUB_RESILIENCY_TEST | 150 | #undef SLUB_RESILIENCY_TEST |
151 | 151 | ||
152 | /* | ||
153 | * Currently fastpath is not supported if preemption is enabled. | ||
154 | */ | ||
155 | #if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT) | ||
156 | #define SLUB_FASTPATH | ||
157 | #endif | ||
158 | |||
152 | #if PAGE_SHIFT <= 12 | 159 | #if PAGE_SHIFT <= 12 |
153 | 160 | ||
154 | /* | 161 | /* |
@@ -243,6 +250,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE }; | |||
243 | static int sysfs_slab_add(struct kmem_cache *); | 250 | static int sysfs_slab_add(struct kmem_cache *); |
244 | static int sysfs_slab_alias(struct kmem_cache *, const char *); | 251 | static int sysfs_slab_alias(struct kmem_cache *, const char *); |
245 | static void sysfs_slab_remove(struct kmem_cache *); | 252 | static void sysfs_slab_remove(struct kmem_cache *); |
253 | |||
246 | #else | 254 | #else |
247 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } | 255 | static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } |
248 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) | 256 | static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) |
@@ -251,8 +259,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) | |||
251 | { | 259 | { |
252 | kfree(s); | 260 | kfree(s); |
253 | } | 261 | } |
262 | |||
254 | #endif | 263 | #endif |
255 | 264 | ||
265 | static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) | ||
266 | { | ||
267 | #ifdef CONFIG_SLUB_STATS | ||
268 | c->stat[si]++; | ||
269 | #endif | ||
270 | } | ||
271 | |||
256 | /******************************************************************** | 272 | /******************************************************************** |
257 | * Core slab cache functions | 273 | * Core slab cache functions |
258 | *******************************************************************/ | 274 | *******************************************************************/ |
@@ -280,15 +296,32 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | |||
280 | #endif | 296 | #endif |
281 | } | 297 | } |
282 | 298 | ||
299 | /* | ||
300 | * The end pointer in a slab is special. It points to the first object in the | ||
301 | * slab but has bit 0 set to mark it. | ||
302 | * | ||
303 | * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 | ||
304 | * in the mapping set. | ||
305 | */ | ||
306 | static inline int is_end(void *addr) | ||
307 | { | ||
308 | return (unsigned long)addr & PAGE_MAPPING_ANON; | ||
309 | } | ||
310 | |||
311 | void *slab_address(struct page *page) | ||
312 | { | ||
313 | return page->end - PAGE_MAPPING_ANON; | ||
314 | } | ||
315 | |||
283 | static inline int check_valid_pointer(struct kmem_cache *s, | 316 | static inline int check_valid_pointer(struct kmem_cache *s, |
284 | struct page *page, const void *object) | 317 | struct page *page, const void *object) |
285 | { | 318 | { |
286 | void *base; | 319 | void *base; |
287 | 320 | ||
288 | if (!object) | 321 | if (object == page->end) |
289 | return 1; | 322 | return 1; |
290 | 323 | ||
291 | base = page_address(page); | 324 | base = slab_address(page); |
292 | if (object < base || object >= base + s->objects * s->size || | 325 | if (object < base || object >= base + s->objects * s->size || |
293 | (object - base) % s->size) { | 326 | (object - base) % s->size) { |
294 | return 0; | 327 | return 0; |
@@ -321,7 +354,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) | |||
321 | 354 | ||
322 | /* Scan freelist */ | 355 | /* Scan freelist */ |
323 | #define for_each_free_object(__p, __s, __free) \ | 356 | #define for_each_free_object(__p, __s, __free) \ |
324 | for (__p = (__free); __p; __p = get_freepointer((__s), __p)) | 357 | for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ |
358 | __p)) | ||
325 | 359 | ||
326 | /* Determine object index from a given position */ | 360 | /* Determine object index from a given position */ |
327 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | 361 | static inline int slab_index(void *p, struct kmem_cache *s, void *addr) |
@@ -473,7 +507,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) | |||
473 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) | 507 | static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) |
474 | { | 508 | { |
475 | unsigned int off; /* Offset of last byte */ | 509 | unsigned int off; /* Offset of last byte */ |
476 | u8 *addr = page_address(page); | 510 | u8 *addr = slab_address(page); |
477 | 511 | ||
478 | print_tracking(s, p); | 512 | print_tracking(s, p); |
479 | 513 | ||
@@ -651,7 +685,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
651 | if (!(s->flags & SLAB_POISON)) | 685 | if (!(s->flags & SLAB_POISON)) |
652 | return 1; | 686 | return 1; |
653 | 687 | ||
654 | start = page_address(page); | 688 | start = slab_address(page); |
655 | end = start + (PAGE_SIZE << s->order); | 689 | end = start + (PAGE_SIZE << s->order); |
656 | length = s->objects * s->size; | 690 | length = s->objects * s->size; |
657 | remainder = end - (start + length); | 691 | remainder = end - (start + length); |
@@ -685,9 +719,10 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
685 | endobject, red, s->inuse - s->objsize)) | 719 | endobject, red, s->inuse - s->objsize)) |
686 | return 0; | 720 | return 0; |
687 | } else { | 721 | } else { |
688 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) | 722 | if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) { |
689 | check_bytes_and_report(s, page, p, "Alignment padding", endobject, | 723 | check_bytes_and_report(s, page, p, "Alignment padding", |
690 | POISON_INUSE, s->inuse - s->objsize); | 724 | endobject, POISON_INUSE, s->inuse - s->objsize); |
725 | } | ||
691 | } | 726 | } |
692 | 727 | ||
693 | if (s->flags & SLAB_POISON) { | 728 | if (s->flags & SLAB_POISON) { |
@@ -718,7 +753,7 @@ static int check_object(struct kmem_cache *s, struct page *page, | |||
718 | * of the free objects in this slab. May cause | 753 | * of the free objects in this slab. May cause |
719 | * another error because the object count is now wrong. | 754 | * another error because the object count is now wrong. |
720 | */ | 755 | */ |
721 | set_freepointer(s, p, NULL); | 756 | set_freepointer(s, p, page->end); |
722 | return 0; | 757 | return 0; |
723 | } | 758 | } |
724 | return 1; | 759 | return 1; |
@@ -752,18 +787,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | |||
752 | void *fp = page->freelist; | 787 | void *fp = page->freelist; |
753 | void *object = NULL; | 788 | void *object = NULL; |
754 | 789 | ||
755 | while (fp && nr <= s->objects) { | 790 | while (fp != page->end && nr <= s->objects) { |
756 | if (fp == search) | 791 | if (fp == search) |
757 | return 1; | 792 | return 1; |
758 | if (!check_valid_pointer(s, page, fp)) { | 793 | if (!check_valid_pointer(s, page, fp)) { |
759 | if (object) { | 794 | if (object) { |
760 | object_err(s, page, object, | 795 | object_err(s, page, object, |
761 | "Freechain corrupt"); | 796 | "Freechain corrupt"); |
762 | set_freepointer(s, object, NULL); | 797 | set_freepointer(s, object, page->end); |
763 | break; | 798 | break; |
764 | } else { | 799 | } else { |
765 | slab_err(s, page, "Freepointer corrupt"); | 800 | slab_err(s, page, "Freepointer corrupt"); |
766 | page->freelist = NULL; | 801 | page->freelist = page->end; |
767 | page->inuse = s->objects; | 802 | page->inuse = s->objects; |
768 | slab_fix(s, "Freelist cleared"); | 803 | slab_fix(s, "Freelist cleared"); |
769 | return 0; | 804 | return 0; |
@@ -869,7 +904,7 @@ bad: | |||
869 | */ | 904 | */ |
870 | slab_fix(s, "Marking all objects used"); | 905 | slab_fix(s, "Marking all objects used"); |
871 | page->inuse = s->objects; | 906 | page->inuse = s->objects; |
872 | page->freelist = NULL; | 907 | page->freelist = page->end; |
873 | } | 908 | } |
874 | return 0; | 909 | return 0; |
875 | } | 910 | } |
@@ -894,11 +929,10 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
894 | return 0; | 929 | return 0; |
895 | 930 | ||
896 | if (unlikely(s != page->slab)) { | 931 | if (unlikely(s != page->slab)) { |
897 | if (!PageSlab(page)) | 932 | if (!PageSlab(page)) { |
898 | slab_err(s, page, "Attempt to free object(0x%p) " | 933 | slab_err(s, page, "Attempt to free object(0x%p) " |
899 | "outside of slab", object); | 934 | "outside of slab", object); |
900 | else | 935 | } else if (!page->slab) { |
901 | if (!page->slab) { | ||
902 | printk(KERN_ERR | 936 | printk(KERN_ERR |
903 | "SLUB <none>: no slab for object 0x%p.\n", | 937 | "SLUB <none>: no slab for object 0x%p.\n", |
904 | object); | 938 | object); |
@@ -910,7 +944,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, | |||
910 | } | 944 | } |
911 | 945 | ||
912 | /* Special debug activities for freeing objects */ | 946 | /* Special debug activities for freeing objects */ |
913 | if (!SlabFrozen(page) && !page->freelist) | 947 | if (!SlabFrozen(page) && page->freelist == page->end) |
914 | remove_full(s, page); | 948 | remove_full(s, page); |
915 | if (s->flags & SLAB_STORE_USER) | 949 | if (s->flags & SLAB_STORE_USER) |
916 | set_track(s, object, TRACK_FREE, addr); | 950 | set_track(s, object, TRACK_FREE, addr); |
@@ -1007,7 +1041,7 @@ static unsigned long kmem_cache_flags(unsigned long objsize, | |||
1007 | */ | 1041 | */ |
1008 | if (slub_debug && (!slub_debug_slabs || | 1042 | if (slub_debug && (!slub_debug_slabs || |
1009 | strncmp(slub_debug_slabs, name, | 1043 | strncmp(slub_debug_slabs, name, |
1010 | strlen(slub_debug_slabs)) == 0)) | 1044 | strlen(slub_debug_slabs)) == 0)) |
1011 | flags |= slub_debug; | 1045 | flags |= slub_debug; |
1012 | } | 1046 | } |
1013 | 1047 | ||
@@ -1102,6 +1136,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1102 | SetSlabDebug(page); | 1136 | SetSlabDebug(page); |
1103 | 1137 | ||
1104 | start = page_address(page); | 1138 | start = page_address(page); |
1139 | page->end = start + 1; | ||
1105 | 1140 | ||
1106 | if (unlikely(s->flags & SLAB_POISON)) | 1141 | if (unlikely(s->flags & SLAB_POISON)) |
1107 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); | 1142 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); |
@@ -1113,7 +1148,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1113 | last = p; | 1148 | last = p; |
1114 | } | 1149 | } |
1115 | setup_object(s, page, last); | 1150 | setup_object(s, page, last); |
1116 | set_freepointer(s, last, NULL); | 1151 | set_freepointer(s, last, page->end); |
1117 | 1152 | ||
1118 | page->freelist = start; | 1153 | page->freelist = start; |
1119 | page->inuse = 0; | 1154 | page->inuse = 0; |
@@ -1129,7 +1164,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1129 | void *p; | 1164 | void *p; |
1130 | 1165 | ||
1131 | slab_pad_check(s, page); | 1166 | slab_pad_check(s, page); |
1132 | for_each_object(p, s, page_address(page)) | 1167 | for_each_object(p, s, slab_address(page)) |
1133 | check_object(s, page, p, 0); | 1168 | check_object(s, page, p, 0); |
1134 | ClearSlabDebug(page); | 1169 | ClearSlabDebug(page); |
1135 | } | 1170 | } |
@@ -1139,6 +1174,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1139 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1174 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
1140 | -pages); | 1175 | -pages); |
1141 | 1176 | ||
1177 | page->mapping = NULL; | ||
1142 | __free_pages(page, s->order); | 1178 | __free_pages(page, s->order); |
1143 | } | 1179 | } |
1144 | 1180 | ||
@@ -1183,7 +1219,7 @@ static __always_inline void slab_lock(struct page *page) | |||
1183 | 1219 | ||
1184 | static __always_inline void slab_unlock(struct page *page) | 1220 | static __always_inline void slab_unlock(struct page *page) |
1185 | { | 1221 | { |
1186 | bit_spin_unlock(PG_locked, &page->flags); | 1222 | __bit_spin_unlock(PG_locked, &page->flags); |
1187 | } | 1223 | } |
1188 | 1224 | ||
1189 | static __always_inline int slab_trylock(struct page *page) | 1225 | static __always_inline int slab_trylock(struct page *page) |
@@ -1294,8 +1330,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1294 | get_cycles() % 1024 > s->remote_node_defrag_ratio) | 1330 | get_cycles() % 1024 > s->remote_node_defrag_ratio) |
1295 | return NULL; | 1331 | return NULL; |
1296 | 1332 | ||
1297 | zonelist = &NODE_DATA(slab_node(current->mempolicy)) | 1333 | zonelist = &NODE_DATA( |
1298 | ->node_zonelists[gfp_zone(flags)]; | 1334 | slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)]; |
1299 | for (z = zonelist->zones; *z; z++) { | 1335 | for (z = zonelist->zones; *z; z++) { |
1300 | struct kmem_cache_node *n; | 1336 | struct kmem_cache_node *n; |
1301 | 1337 | ||
@@ -1337,17 +1373,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | |||
1337 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | 1373 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) |
1338 | { | 1374 | { |
1339 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1375 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1376 | struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); | ||
1340 | 1377 | ||
1341 | ClearSlabFrozen(page); | 1378 | ClearSlabFrozen(page); |
1342 | if (page->inuse) { | 1379 | if (page->inuse) { |
1343 | 1380 | ||
1344 | if (page->freelist) | 1381 | if (page->freelist != page->end) { |
1345 | add_partial(n, page, tail); | 1382 | add_partial(n, page, tail); |
1346 | else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) | 1383 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
1347 | add_full(n, page); | 1384 | } else { |
1385 | stat(c, DEACTIVATE_FULL); | ||
1386 | if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) | ||
1387 | add_full(n, page); | ||
1388 | } | ||
1348 | slab_unlock(page); | 1389 | slab_unlock(page); |
1349 | |||
1350 | } else { | 1390 | } else { |
1391 | stat(c, DEACTIVATE_EMPTY); | ||
1351 | if (n->nr_partial < MIN_PARTIAL) { | 1392 | if (n->nr_partial < MIN_PARTIAL) { |
1352 | /* | 1393 | /* |
1353 | * Adding an empty slab to the partial slabs in order | 1394 | * Adding an empty slab to the partial slabs in order |
@@ -1361,6 +1402,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1361 | slab_unlock(page); | 1402 | slab_unlock(page); |
1362 | } else { | 1403 | } else { |
1363 | slab_unlock(page); | 1404 | slab_unlock(page); |
1405 | stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); | ||
1364 | discard_slab(s, page); | 1406 | discard_slab(s, page); |
1365 | } | 1407 | } |
1366 | } | 1408 | } |
@@ -1373,12 +1415,19 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1373 | { | 1415 | { |
1374 | struct page *page = c->page; | 1416 | struct page *page = c->page; |
1375 | int tail = 1; | 1417 | int tail = 1; |
1418 | |||
1419 | if (c->freelist) | ||
1420 | stat(c, DEACTIVATE_REMOTE_FREES); | ||
1376 | /* | 1421 | /* |
1377 | * Merge cpu freelist into freelist. Typically we get here | 1422 | * Merge cpu freelist into freelist. Typically we get here |
1378 | * because both freelists are empty. So this is unlikely | 1423 | * because both freelists are empty. So this is unlikely |
1379 | * to occur. | 1424 | * to occur. |
1425 | * | ||
1426 | * We need to use _is_end here because deactivate slab may | ||
1427 | * be called for a debug slab. Then c->freelist may contain | ||
1428 | * a dummy pointer. | ||
1380 | */ | 1429 | */ |
1381 | while (unlikely(c->freelist)) { | 1430 | while (unlikely(!is_end(c->freelist))) { |
1382 | void **object; | 1431 | void **object; |
1383 | 1432 | ||
1384 | tail = 0; /* Hot objects. Put the slab first */ | 1433 | tail = 0; /* Hot objects. Put the slab first */ |
@@ -1398,6 +1447,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1398 | 1447 | ||
1399 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1448 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1400 | { | 1449 | { |
1450 | stat(c, CPUSLAB_FLUSH); | ||
1401 | slab_lock(c->page); | 1451 | slab_lock(c->page); |
1402 | deactivate_slab(s, c); | 1452 | deactivate_slab(s, c); |
1403 | } | 1453 | } |
@@ -1469,16 +1519,21 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
1469 | { | 1519 | { |
1470 | void **object; | 1520 | void **object; |
1471 | struct page *new; | 1521 | struct page *new; |
1522 | #ifdef SLUB_FASTPATH | ||
1523 | unsigned long flags; | ||
1472 | 1524 | ||
1525 | local_irq_save(flags); | ||
1526 | #endif | ||
1473 | if (!c->page) | 1527 | if (!c->page) |
1474 | goto new_slab; | 1528 | goto new_slab; |
1475 | 1529 | ||
1476 | slab_lock(c->page); | 1530 | slab_lock(c->page); |
1477 | if (unlikely(!node_match(c, node))) | 1531 | if (unlikely(!node_match(c, node))) |
1478 | goto another_slab; | 1532 | goto another_slab; |
1533 | stat(c, ALLOC_REFILL); | ||
1479 | load_freelist: | 1534 | load_freelist: |
1480 | object = c->page->freelist; | 1535 | object = c->page->freelist; |
1481 | if (unlikely(!object)) | 1536 | if (unlikely(object == c->page->end)) |
1482 | goto another_slab; | 1537 | goto another_slab; |
1483 | if (unlikely(SlabDebug(c->page))) | 1538 | if (unlikely(SlabDebug(c->page))) |
1484 | goto debug; | 1539 | goto debug; |
@@ -1486,9 +1541,15 @@ load_freelist: | |||
1486 | object = c->page->freelist; | 1541 | object = c->page->freelist; |
1487 | c->freelist = object[c->offset]; | 1542 | c->freelist = object[c->offset]; |
1488 | c->page->inuse = s->objects; | 1543 | c->page->inuse = s->objects; |
1489 | c->page->freelist = NULL; | 1544 | c->page->freelist = c->page->end; |
1490 | c->node = page_to_nid(c->page); | 1545 | c->node = page_to_nid(c->page); |
1546 | unlock_out: | ||
1491 | slab_unlock(c->page); | 1547 | slab_unlock(c->page); |
1548 | stat(c, ALLOC_SLOWPATH); | ||
1549 | out: | ||
1550 | #ifdef SLUB_FASTPATH | ||
1551 | local_irq_restore(flags); | ||
1552 | #endif | ||
1492 | return object; | 1553 | return object; |
1493 | 1554 | ||
1494 | another_slab: | 1555 | another_slab: |
@@ -1498,6 +1559,7 @@ new_slab: | |||
1498 | new = get_partial(s, gfpflags, node); | 1559 | new = get_partial(s, gfpflags, node); |
1499 | if (new) { | 1560 | if (new) { |
1500 | c->page = new; | 1561 | c->page = new; |
1562 | stat(c, ALLOC_FROM_PARTIAL); | ||
1501 | goto load_freelist; | 1563 | goto load_freelist; |
1502 | } | 1564 | } |
1503 | 1565 | ||
@@ -1511,6 +1573,7 @@ new_slab: | |||
1511 | 1573 | ||
1512 | if (new) { | 1574 | if (new) { |
1513 | c = get_cpu_slab(s, smp_processor_id()); | 1575 | c = get_cpu_slab(s, smp_processor_id()); |
1576 | stat(c, ALLOC_SLAB); | ||
1514 | if (c->page) | 1577 | if (c->page) |
1515 | flush_slab(s, c); | 1578 | flush_slab(s, c); |
1516 | slab_lock(new); | 1579 | slab_lock(new); |
@@ -1518,7 +1581,8 @@ new_slab: | |||
1518 | c->page = new; | 1581 | c->page = new; |
1519 | goto load_freelist; | 1582 | goto load_freelist; |
1520 | } | 1583 | } |
1521 | return NULL; | 1584 | object = NULL; |
1585 | goto out; | ||
1522 | debug: | 1586 | debug: |
1523 | object = c->page->freelist; | 1587 | object = c->page->freelist; |
1524 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1588 | if (!alloc_debug_processing(s, c->page, object, addr)) |
@@ -1527,8 +1591,7 @@ debug: | |||
1527 | c->page->inuse++; | 1591 | c->page->inuse++; |
1528 | c->page->freelist = object[c->offset]; | 1592 | c->page->freelist = object[c->offset]; |
1529 | c->node = -1; | 1593 | c->node = -1; |
1530 | slab_unlock(c->page); | 1594 | goto unlock_out; |
1531 | return object; | ||
1532 | } | 1595 | } |
1533 | 1596 | ||
1534 | /* | 1597 | /* |
@@ -1545,20 +1608,50 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1545 | gfp_t gfpflags, int node, void *addr) | 1608 | gfp_t gfpflags, int node, void *addr) |
1546 | { | 1609 | { |
1547 | void **object; | 1610 | void **object; |
1548 | unsigned long flags; | ||
1549 | struct kmem_cache_cpu *c; | 1611 | struct kmem_cache_cpu *c; |
1550 | 1612 | ||
1613 | /* | ||
1614 | * The SLUB_FASTPATH path is provisional and is currently disabled if the | ||
1615 | * kernel is compiled with preemption or if the arch does not support | ||
1616 | * fast cmpxchg operations. There are a couple of coming changes that will | ||
1617 | * simplify matters and allow preemption. Ultimately we may end up making | ||
1618 | * SLUB_FASTPATH the default. | ||
1619 | * | ||
1620 | * 1. The introduction of the per cpu allocator will avoid array lookups | ||
1621 | * through get_cpu_slab(). A special register can be used instead. | ||
1622 | * | ||
1623 | * 2. The introduction of per cpu atomic operations (cpu_ops) means that | ||
1624 | * we can realize the logic here entirely with per cpu atomics. The | ||
1625 | * per cpu atomic ops will take care of the preemption issues. | ||
1626 | */ | ||
1627 | |||
1628 | #ifdef SLUB_FASTPATH | ||
1629 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1630 | do { | ||
1631 | object = c->freelist; | ||
1632 | if (unlikely(is_end(object) || !node_match(c, node))) { | ||
1633 | object = __slab_alloc(s, gfpflags, node, addr, c); | ||
1634 | break; | ||
1635 | } | ||
1636 | stat(c, ALLOC_FASTPATH); | ||
1637 | } while (cmpxchg_local(&c->freelist, object, object[c->offset]) | ||
1638 | != object); | ||
1639 | #else | ||
1640 | unsigned long flags; | ||
1641 | |||
1551 | local_irq_save(flags); | 1642 | local_irq_save(flags); |
1552 | c = get_cpu_slab(s, smp_processor_id()); | 1643 | c = get_cpu_slab(s, smp_processor_id()); |
1553 | if (unlikely(!c->freelist || !node_match(c, node))) | 1644 | if (unlikely(is_end(c->freelist) || !node_match(c, node))) |
1554 | 1645 | ||
1555 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1646 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1556 | 1647 | ||
1557 | else { | 1648 | else { |
1558 | object = c->freelist; | 1649 | object = c->freelist; |
1559 | c->freelist = object[c->offset]; | 1650 | c->freelist = object[c->offset]; |
1651 | stat(c, ALLOC_FASTPATH); | ||
1560 | } | 1652 | } |
1561 | local_irq_restore(flags); | 1653 | local_irq_restore(flags); |
1654 | #endif | ||
1562 | 1655 | ||
1563 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1656 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
1564 | memset(object, 0, c->objsize); | 1657 | memset(object, 0, c->objsize); |
@@ -1593,7 +1686,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1593 | { | 1686 | { |
1594 | void *prior; | 1687 | void *prior; |
1595 | void **object = (void *)x; | 1688 | void **object = (void *)x; |
1689 | struct kmem_cache_cpu *c; | ||
1690 | |||
1691 | #ifdef SLUB_FASTPATH | ||
1692 | unsigned long flags; | ||
1596 | 1693 | ||
1694 | local_irq_save(flags); | ||
1695 | #endif | ||
1696 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1697 | stat(c, FREE_SLOWPATH); | ||
1597 | slab_lock(page); | 1698 | slab_lock(page); |
1598 | 1699 | ||
1599 | if (unlikely(SlabDebug(page))) | 1700 | if (unlikely(SlabDebug(page))) |
@@ -1603,8 +1704,10 @@ checks_ok: | |||
1603 | page->freelist = object; | 1704 | page->freelist = object; |
1604 | page->inuse--; | 1705 | page->inuse--; |
1605 | 1706 | ||
1606 | if (unlikely(SlabFrozen(page))) | 1707 | if (unlikely(SlabFrozen(page))) { |
1708 | stat(c, FREE_FROZEN); | ||
1607 | goto out_unlock; | 1709 | goto out_unlock; |
1710 | } | ||
1608 | 1711 | ||
1609 | if (unlikely(!page->inuse)) | 1712 | if (unlikely(!page->inuse)) |
1610 | goto slab_empty; | 1713 | goto slab_empty; |
@@ -1614,21 +1717,31 @@ checks_ok: | |||
1614 | * was not on the partial list before | 1717 | * was not on the partial list before |
1615 | * then add it. | 1718 | * then add it. |
1616 | */ | 1719 | */ |
1617 | if (unlikely(!prior)) | 1720 | if (unlikely(prior == page->end)) { |
1618 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 1721 | add_partial(get_node(s, page_to_nid(page)), page, 1); |
1722 | stat(c, FREE_ADD_PARTIAL); | ||
1723 | } | ||
1619 | 1724 | ||
1620 | out_unlock: | 1725 | out_unlock: |
1621 | slab_unlock(page); | 1726 | slab_unlock(page); |
1727 | #ifdef SLUB_FASTPATH | ||
1728 | local_irq_restore(flags); | ||
1729 | #endif | ||
1622 | return; | 1730 | return; |
1623 | 1731 | ||
1624 | slab_empty: | 1732 | slab_empty: |
1625 | if (prior) | 1733 | if (prior != page->end) { |
1626 | /* | 1734 | /* |
1627 | * Slab still on the partial list. | 1735 | * Slab still on the partial list. |
1628 | */ | 1736 | */ |
1629 | remove_partial(s, page); | 1737 | remove_partial(s, page); |
1630 | 1738 | stat(c, FREE_REMOVE_PARTIAL); | |
1739 | } | ||
1631 | slab_unlock(page); | 1740 | slab_unlock(page); |
1741 | stat(c, FREE_SLAB); | ||
1742 | #ifdef SLUB_FASTPATH | ||
1743 | local_irq_restore(flags); | ||
1744 | #endif | ||
1632 | discard_slab(s, page); | 1745 | discard_slab(s, page); |
1633 | return; | 1746 | return; |
1634 | 1747 | ||
@@ -1653,19 +1766,49 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1653 | struct page *page, void *x, void *addr) | 1766 | struct page *page, void *x, void *addr) |
1654 | { | 1767 | { |
1655 | void **object = (void *)x; | 1768 | void **object = (void *)x; |
1656 | unsigned long flags; | ||
1657 | struct kmem_cache_cpu *c; | 1769 | struct kmem_cache_cpu *c; |
1658 | 1770 | ||
1771 | #ifdef SLUB_FASTPATH | ||
1772 | void **freelist; | ||
1773 | |||
1774 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1775 | debug_check_no_locks_freed(object, s->objsize); | ||
1776 | do { | ||
1777 | freelist = c->freelist; | ||
1778 | barrier(); | ||
1779 | /* | ||
1780 | * If the compiler would reorder the retrieval of c->page to | ||
1781 | * come before c->freelist then an interrupt could | ||
1782 | * change the cpu slab before we retrieve c->freelist. We | ||
1783 | * could be matching on a page no longer active and put the | ||
1784 | * object onto the freelist of the wrong slab. | ||
1785 | * | ||
1786 | * On the other hand: If we already have the freelist pointer | ||
1787 | * then any change of cpu_slab will cause the cmpxchg to fail | ||
1788 | * since the freelist pointers are unique per slab. | ||
1789 | */ | ||
1790 | if (unlikely(page != c->page || c->node < 0)) { | ||
1791 | __slab_free(s, page, x, addr, c->offset); | ||
1792 | break; | ||
1793 | } | ||
1794 | object[c->offset] = freelist; | ||
1795 | stat(c, FREE_FASTPATH); | ||
1796 | } while (cmpxchg_local(&c->freelist, freelist, object) != freelist); | ||
1797 | #else | ||
1798 | unsigned long flags; | ||
1799 | |||
1659 | local_irq_save(flags); | 1800 | local_irq_save(flags); |
1660 | debug_check_no_locks_freed(object, s->objsize); | 1801 | debug_check_no_locks_freed(object, s->objsize); |
1661 | c = get_cpu_slab(s, smp_processor_id()); | 1802 | c = get_cpu_slab(s, smp_processor_id()); |
1662 | if (likely(page == c->page && c->node >= 0)) { | 1803 | if (likely(page == c->page && c->node >= 0)) { |
1663 | object[c->offset] = c->freelist; | 1804 | object[c->offset] = c->freelist; |
1664 | c->freelist = object; | 1805 | c->freelist = object; |
1806 | stat(c, FREE_FASTPATH); | ||
1665 | } else | 1807 | } else |
1666 | __slab_free(s, page, x, addr, c->offset); | 1808 | __slab_free(s, page, x, addr, c->offset); |
1667 | 1809 | ||
1668 | local_irq_restore(flags); | 1810 | local_irq_restore(flags); |
1811 | #endif | ||
1669 | } | 1812 | } |
1670 | 1813 | ||
1671 | void kmem_cache_free(struct kmem_cache *s, void *x) | 1814 | void kmem_cache_free(struct kmem_cache *s, void *x) |
@@ -1842,7 +1985,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, | |||
1842 | struct kmem_cache_cpu *c) | 1985 | struct kmem_cache_cpu *c) |
1843 | { | 1986 | { |
1844 | c->page = NULL; | 1987 | c->page = NULL; |
1845 | c->freelist = NULL; | 1988 | c->freelist = (void *)PAGE_MAPPING_ANON; |
1846 | c->node = 0; | 1989 | c->node = 0; |
1847 | c->offset = s->offset / sizeof(void *); | 1990 | c->offset = s->offset / sizeof(void *); |
1848 | c->objsize = s->objsize; | 1991 | c->objsize = s->objsize; |
@@ -2446,7 +2589,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2446 | goto unlock_out; | 2589 | goto unlock_out; |
2447 | 2590 | ||
2448 | realsize = kmalloc_caches[index].objsize; | 2591 | realsize = kmalloc_caches[index].objsize; |
2449 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize), | 2592 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", |
2593 | (unsigned int)realsize); | ||
2450 | s = kmalloc(kmem_size, flags & ~SLUB_DMA); | 2594 | s = kmalloc(kmem_size, flags & ~SLUB_DMA); |
2451 | 2595 | ||
2452 | if (!s || !text || !kmem_cache_open(s, flags, text, | 2596 | if (!s || !text || !kmem_cache_open(s, flags, text, |
@@ -2601,6 +2745,7 @@ EXPORT_SYMBOL(ksize); | |||
2601 | void kfree(const void *x) | 2745 | void kfree(const void *x) |
2602 | { | 2746 | { |
2603 | struct page *page; | 2747 | struct page *page; |
2748 | void *object = (void *)x; | ||
2604 | 2749 | ||
2605 | if (unlikely(ZERO_OR_NULL_PTR(x))) | 2750 | if (unlikely(ZERO_OR_NULL_PTR(x))) |
2606 | return; | 2751 | return; |
@@ -2610,7 +2755,7 @@ void kfree(const void *x) | |||
2610 | put_page(page); | 2755 | put_page(page); |
2611 | return; | 2756 | return; |
2612 | } | 2757 | } |
2613 | slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); | 2758 | slab_free(page->slab, page, object, __builtin_return_address(0)); |
2614 | } | 2759 | } |
2615 | EXPORT_SYMBOL(kfree); | 2760 | EXPORT_SYMBOL(kfree); |
2616 | 2761 | ||
@@ -2896,7 +3041,8 @@ void __init kmem_cache_init(void) | |||
2896 | #endif | 3041 | #endif |
2897 | 3042 | ||
2898 | 3043 | ||
2899 | printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | 3044 | printk(KERN_INFO |
3045 | "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," | ||
2900 | " CPUs=%d, Nodes=%d\n", | 3046 | " CPUs=%d, Nodes=%d\n", |
2901 | caches, cache_line_size(), | 3047 | caches, cache_line_size(), |
2902 | slub_min_order, slub_max_order, slub_min_objects, | 3048 | slub_min_order, slub_max_order, slub_min_objects, |
@@ -3063,7 +3209,7 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
3063 | } | 3209 | } |
3064 | 3210 | ||
3065 | static struct notifier_block __cpuinitdata slab_notifier = { | 3211 | static struct notifier_block __cpuinitdata slab_notifier = { |
3066 | &slab_cpuup_callback, NULL, 0 | 3212 | .notifier_call = slab_cpuup_callback |
3067 | }; | 3213 | }; |
3068 | 3214 | ||
3069 | #endif | 3215 | #endif |
@@ -3104,7 +3250,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
3104 | unsigned long *map) | 3250 | unsigned long *map) |
3105 | { | 3251 | { |
3106 | void *p; | 3252 | void *p; |
3107 | void *addr = page_address(page); | 3253 | void *addr = slab_address(page); |
3108 | 3254 | ||
3109 | if (!check_slab(s, page) || | 3255 | if (!check_slab(s, page) || |
3110 | !on_freelist(s, page, NULL)) | 3256 | !on_freelist(s, page, NULL)) |
@@ -3221,8 +3367,9 @@ static void resiliency_test(void) | |||
3221 | p = kzalloc(32, GFP_KERNEL); | 3367 | p = kzalloc(32, GFP_KERNEL); |
3222 | p[32 + sizeof(void *)] = 0x34; | 3368 | p[32 + sizeof(void *)] = 0x34; |
3223 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" | 3369 | printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" |
3224 | " 0x34 -> -0x%p\n", p); | 3370 | " 0x34 -> -0x%p\n", p); |
3225 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | 3371 | printk(KERN_ERR |
3372 | "If allocated object is overwritten then not detectable\n\n"); | ||
3226 | 3373 | ||
3227 | validate_slab_cache(kmalloc_caches + 5); | 3374 | validate_slab_cache(kmalloc_caches + 5); |
3228 | p = kzalloc(64, GFP_KERNEL); | 3375 | p = kzalloc(64, GFP_KERNEL); |
@@ -3230,7 +3377,8 @@ static void resiliency_test(void) | |||
3230 | *p = 0x56; | 3377 | *p = 0x56; |
3231 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", | 3378 | printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", |
3232 | p); | 3379 | p); |
3233 | printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); | 3380 | printk(KERN_ERR |
3381 | "If allocated object is overwritten then not detectable\n\n"); | ||
3234 | validate_slab_cache(kmalloc_caches + 6); | 3382 | validate_slab_cache(kmalloc_caches + 6); |
3235 | 3383 | ||
3236 | printk(KERN_ERR "\nB. Corruption after free\n"); | 3384 | printk(KERN_ERR "\nB. Corruption after free\n"); |
@@ -3243,7 +3391,8 @@ static void resiliency_test(void) | |||
3243 | p = kzalloc(256, GFP_KERNEL); | 3391 | p = kzalloc(256, GFP_KERNEL); |
3244 | kfree(p); | 3392 | kfree(p); |
3245 | p[50] = 0x9a; | 3393 | p[50] = 0x9a; |
3246 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); | 3394 | printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", |
3395 | p); | ||
3247 | validate_slab_cache(kmalloc_caches + 8); | 3396 | validate_slab_cache(kmalloc_caches + 8); |
3248 | 3397 | ||
3249 | p = kzalloc(512, GFP_KERNEL); | 3398 | p = kzalloc(512, GFP_KERNEL); |
@@ -3384,7 +3533,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, | |||
3384 | static void process_slab(struct loc_track *t, struct kmem_cache *s, | 3533 | static void process_slab(struct loc_track *t, struct kmem_cache *s, |
3385 | struct page *page, enum track_item alloc) | 3534 | struct page *page, enum track_item alloc) |
3386 | { | 3535 | { |
3387 | void *addr = page_address(page); | 3536 | void *addr = slab_address(page); |
3388 | DECLARE_BITMAP(map, s->objects); | 3537 | DECLARE_BITMAP(map, s->objects); |
3389 | void *p; | 3538 | void *p; |
3390 | 3539 | ||
@@ -3872,6 +4021,62 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s, | |||
3872 | SLAB_ATTR(remote_node_defrag_ratio); | 4021 | SLAB_ATTR(remote_node_defrag_ratio); |
3873 | #endif | 4022 | #endif |
3874 | 4023 | ||
4024 | #ifdef CONFIG_SLUB_STATS | ||
4025 | |||
4026 | static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | ||
4027 | { | ||
4028 | unsigned long sum = 0; | ||
4029 | int cpu; | ||
4030 | int len; | ||
4031 | int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL); | ||
4032 | |||
4033 | if (!data) | ||
4034 | return -ENOMEM; | ||
4035 | |||
4036 | for_each_online_cpu(cpu) { | ||
4037 | unsigned x = get_cpu_slab(s, cpu)->stat[si]; | ||
4038 | |||
4039 | data[cpu] = x; | ||
4040 | sum += x; | ||
4041 | } | ||
4042 | |||
4043 | len = sprintf(buf, "%lu", sum); | ||
4044 | |||
4045 | for_each_online_cpu(cpu) { | ||
4046 | if (data[cpu] && len < PAGE_SIZE - 20) | ||
4047 | len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]); | ||
4048 | } | ||
4049 | kfree(data); | ||
4050 | return len + sprintf(buf + len, "\n"); | ||
4051 | } | ||
4052 | |||
4053 | #define STAT_ATTR(si, text) \ | ||
4054 | static ssize_t text##_show(struct kmem_cache *s, char *buf) \ | ||
4055 | { \ | ||
4056 | return show_stat(s, buf, si); \ | ||
4057 | } \ | ||
4058 | SLAB_ATTR_RO(text); \ | ||
4059 | |||
4060 | STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); | ||
4061 | STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); | ||
4062 | STAT_ATTR(FREE_FASTPATH, free_fastpath); | ||
4063 | STAT_ATTR(FREE_SLOWPATH, free_slowpath); | ||
4064 | STAT_ATTR(FREE_FROZEN, free_frozen); | ||
4065 | STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial); | ||
4066 | STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); | ||
4067 | STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); | ||
4068 | STAT_ATTR(ALLOC_SLAB, alloc_slab); | ||
4069 | STAT_ATTR(ALLOC_REFILL, alloc_refill); | ||
4070 | STAT_ATTR(FREE_SLAB, free_slab); | ||
4071 | STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); | ||
4072 | STAT_ATTR(DEACTIVATE_FULL, deactivate_full); | ||
4073 | STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); | ||
4074 | STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); | ||
4075 | STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); | ||
4076 | STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); | ||
4077 | |||
4078 | #endif | ||
4079 | |||
3875 | static struct attribute *slab_attrs[] = { | 4080 | static struct attribute *slab_attrs[] = { |
3876 | &slab_size_attr.attr, | 4081 | &slab_size_attr.attr, |
3877 | &object_size_attr.attr, | 4082 | &object_size_attr.attr, |
@@ -3902,6 +4107,25 @@ static struct attribute *slab_attrs[] = { | |||
3902 | #ifdef CONFIG_NUMA | 4107 | #ifdef CONFIG_NUMA |
3903 | &remote_node_defrag_ratio_attr.attr, | 4108 | &remote_node_defrag_ratio_attr.attr, |
3904 | #endif | 4109 | #endif |
4110 | #ifdef CONFIG_SLUB_STATS | ||
4111 | &alloc_fastpath_attr.attr, | ||
4112 | &alloc_slowpath_attr.attr, | ||
4113 | &free_fastpath_attr.attr, | ||
4114 | &free_slowpath_attr.attr, | ||
4115 | &free_frozen_attr.attr, | ||
4116 | &free_add_partial_attr.attr, | ||
4117 | &free_remove_partial_attr.attr, | ||
4118 | &alloc_from_partial_attr.attr, | ||
4119 | &alloc_slab_attr.attr, | ||
4120 | &alloc_refill_attr.attr, | ||
4121 | &free_slab_attr.attr, | ||
4122 | &cpuslab_flush_attr.attr, | ||
4123 | &deactivate_full_attr.attr, | ||
4124 | &deactivate_empty_attr.attr, | ||
4125 | &deactivate_to_head_attr.attr, | ||
4126 | &deactivate_to_tail_attr.attr, | ||
4127 | &deactivate_remote_frees_attr.attr, | ||
4128 | #endif | ||
3905 | NULL | 4129 | NULL |
3906 | }; | 4130 | }; |
3907 | 4131 | ||