diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-30 14:21:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-30 14:21:48 -0400 |
commit | c11abbbaa3252875c5740a6880b9a1a6f1e2a870 (patch) | |
tree | 692143f7edd1157ef499bff21143e0d6df7cace5 | |
parent | 1d3fe4a75b691285cded47c9f1a91b30d25287b0 (diff) | |
parent | 9e577e8b46ab0c38970c0f0cd7eae62e6dffddee (diff) |
Merge branch 'slub/lockless' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'slub/lockless' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: (21 commits)
slub: When allocating a new slab also prep the first object
slub: disable interrupts in cmpxchg_double_slab when falling back to pagelock
Avoid duplicate _count variables in page_struct
Revert "SLUB: Fix build breakage in linux/mm_types.h"
SLUB: Fix build breakage in linux/mm_types.h
slub: slabinfo update for cmpxchg handling
slub: Not necessary to check for empty slab on load_freelist
slub: fast release on full slab
slub: Add statistics for the case that the current slab does not match the node
slub: Get rid of the another_slab label
slub: Avoid disabling interrupts in free slowpath
slub: Disable interrupts in free_debug processing
slub: Invert locking and avoid slab lock
slub: Rework allocator fastpaths
slub: Pass kmem_cache struct to lock and freeze slab
slub: explicit list_lock taking
slub: Add cmpxchg_double_slab()
mm: Rearrange struct page
slub: Move page->frozen handling near where the page->freelist handling occurs
slub: Do not use frozen page flag but a bit in the page counters
...
-rw-r--r-- | include/linux/mm_types.h | 89 | ||||
-rw-r--r-- | include/linux/page-flags.h | 5 | ||||
-rw-r--r-- | include/linux/slub_def.h | 3 | ||||
-rw-r--r-- | mm/slub.c | 764 | ||||
-rw-r--r-- | tools/slub/slabinfo.c | 59 |
5 files changed, 616 insertions, 304 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 027935c86c68..774b8952deb4 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -30,23 +30,61 @@ struct address_space; | |||
30 | * moment. Note that we have no way to track which tasks are using | 30 | * moment. Note that we have no way to track which tasks are using |
31 | * a page, though if it is a pagecache page, rmap structures can tell us | 31 | * a page, though if it is a pagecache page, rmap structures can tell us |
32 | * who is mapping it. | 32 | * who is mapping it. |
33 | * | ||
34 | * The objects in struct page are organized in double word blocks in | ||
35 | * order to allows us to use atomic double word operations on portions | ||
36 | * of struct page. That is currently only used by slub but the arrangement | ||
37 | * allows the use of atomic double word operations on the flags/mapping | ||
38 | * and lru list pointers also. | ||
33 | */ | 39 | */ |
34 | struct page { | 40 | struct page { |
41 | /* First double word block */ | ||
35 | unsigned long flags; /* Atomic flags, some possibly | 42 | unsigned long flags; /* Atomic flags, some possibly |
36 | * updated asynchronously */ | 43 | * updated asynchronously */ |
37 | atomic_t _count; /* Usage count, see below. */ | 44 | struct address_space *mapping; /* If low bit clear, points to |
38 | union { | 45 | * inode address_space, or NULL. |
39 | atomic_t _mapcount; /* Count of ptes mapped in mms, | 46 | * If page mapped as anonymous |
40 | * to show when page is mapped | 47 | * memory, low bit is set, and |
41 | * & limit reverse map searches. | 48 | * it points to anon_vma object: |
49 | * see PAGE_MAPPING_ANON below. | ||
42 | */ | 50 | */ |
43 | struct { /* SLUB */ | 51 | /* Second double word */ |
44 | u16 inuse; | 52 | struct { |
45 | u16 objects; | 53 | union { |
54 | pgoff_t index; /* Our offset within mapping. */ | ||
55 | void *freelist; /* slub first free object */ | ||
56 | }; | ||
57 | |||
58 | union { | ||
59 | /* Used for cmpxchg_double in slub */ | ||
60 | unsigned long counters; | ||
61 | |||
62 | struct { | ||
63 | |||
64 | union { | ||
65 | atomic_t _mapcount; /* Count of ptes mapped in mms, | ||
66 | * to show when page is mapped | ||
67 | * & limit reverse map searches. | ||
68 | */ | ||
69 | |||
70 | struct { | ||
71 | unsigned inuse:16; | ||
72 | unsigned objects:15; | ||
73 | unsigned frozen:1; | ||
74 | }; | ||
75 | }; | ||
76 | atomic_t _count; /* Usage count, see below. */ | ||
77 | }; | ||
46 | }; | 78 | }; |
47 | }; | 79 | }; |
80 | |||
81 | /* Third double word block */ | ||
82 | struct list_head lru; /* Pageout list, eg. active_list | ||
83 | * protected by zone->lru_lock ! | ||
84 | */ | ||
85 | |||
86 | /* Remainder is not double word aligned */ | ||
48 | union { | 87 | union { |
49 | struct { | ||
50 | unsigned long private; /* Mapping-private opaque data: | 88 | unsigned long private; /* Mapping-private opaque data: |
51 | * usually used for buffer_heads | 89 | * usually used for buffer_heads |
52 | * if PagePrivate set; used for | 90 | * if PagePrivate set; used for |
@@ -54,27 +92,13 @@ struct page { | |||
54 | * indicates order in the buddy | 92 | * indicates order in the buddy |
55 | * system if PG_buddy is set. | 93 | * system if PG_buddy is set. |
56 | */ | 94 | */ |
57 | struct address_space *mapping; /* If low bit clear, points to | ||
58 | * inode address_space, or NULL. | ||
59 | * If page mapped as anonymous | ||
60 | * memory, low bit is set, and | ||
61 | * it points to anon_vma object: | ||
62 | * see PAGE_MAPPING_ANON below. | ||
63 | */ | ||
64 | }; | ||
65 | #if USE_SPLIT_PTLOCKS | 95 | #if USE_SPLIT_PTLOCKS |
66 | spinlock_t ptl; | 96 | spinlock_t ptl; |
67 | #endif | 97 | #endif |
68 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ | 98 | struct kmem_cache *slab; /* SLUB: Pointer to slab */ |
69 | struct page *first_page; /* Compound tail pages */ | 99 | struct page *first_page; /* Compound tail pages */ |
70 | }; | ||
71 | union { | ||
72 | pgoff_t index; /* Our offset within mapping. */ | ||
73 | void *freelist; /* SLUB: freelist req. slab lock */ | ||
74 | }; | 100 | }; |
75 | struct list_head lru; /* Pageout list, eg. active_list | 101 | |
76 | * protected by zone->lru_lock ! | ||
77 | */ | ||
78 | /* | 102 | /* |
79 | * On machines where all RAM is mapped into kernel address space, | 103 | * On machines where all RAM is mapped into kernel address space, |
80 | * we can simply calculate the virtual address. On machines with | 104 | * we can simply calculate the virtual address. On machines with |
@@ -100,7 +124,16 @@ struct page { | |||
100 | */ | 124 | */ |
101 | void *shadow; | 125 | void *shadow; |
102 | #endif | 126 | #endif |
103 | }; | 127 | } |
128 | /* | ||
129 | * If another subsystem starts using the double word pairing for atomic | ||
130 | * operations on struct page then it must change the #if to ensure | ||
131 | * proper alignment of the page struct. | ||
132 | */ | ||
133 | #if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL) | ||
134 | __attribute__((__aligned__(2*sizeof(unsigned long)))) | ||
135 | #endif | ||
136 | ; | ||
104 | 137 | ||
105 | typedef unsigned long __nocast vm_flags_t; | 138 | typedef unsigned long __nocast vm_flags_t; |
106 | 139 | ||
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3e5a1b189a41..e90a673be67e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -124,9 +124,6 @@ enum pageflags { | |||
124 | 124 | ||
125 | /* SLOB */ | 125 | /* SLOB */ |
126 | PG_slob_free = PG_private, | 126 | PG_slob_free = PG_private, |
127 | |||
128 | /* SLUB */ | ||
129 | PG_slub_frozen = PG_active, | ||
130 | }; | 127 | }; |
131 | 128 | ||
132 | #ifndef __GENERATING_BOUNDS_H | 129 | #ifndef __GENERATING_BOUNDS_H |
@@ -212,8 +209,6 @@ PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) | |||
212 | 209 | ||
213 | __PAGEFLAG(SlobFree, slob_free) | 210 | __PAGEFLAG(SlobFree, slob_free) |
214 | 211 | ||
215 | __PAGEFLAG(SlubFrozen, slub_frozen) | ||
216 | |||
217 | /* | 212 | /* |
218 | * Private page markings that may be used by the filesystem that owns the page | 213 | * Private page markings that may be used by the filesystem that owns the page |
219 | * for its own purposes. | 214 | * for its own purposes. |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 4b35c06dfbc5..f58d6413d230 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -24,6 +24,7 @@ enum stat_item { | |||
24 | ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */ | 24 | ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */ |
25 | ALLOC_SLAB, /* Cpu slab acquired from page allocator */ | 25 | ALLOC_SLAB, /* Cpu slab acquired from page allocator */ |
26 | ALLOC_REFILL, /* Refill cpu slab from slab freelist */ | 26 | ALLOC_REFILL, /* Refill cpu slab from slab freelist */ |
27 | ALLOC_NODE_MISMATCH, /* Switching cpu slab */ | ||
27 | FREE_SLAB, /* Slab freed to the page allocator */ | 28 | FREE_SLAB, /* Slab freed to the page allocator */ |
28 | CPUSLAB_FLUSH, /* Abandoning of the cpu slab */ | 29 | CPUSLAB_FLUSH, /* Abandoning of the cpu slab */ |
29 | DEACTIVATE_FULL, /* Cpu slab was full when deactivated */ | 30 | DEACTIVATE_FULL, /* Cpu slab was full when deactivated */ |
@@ -31,8 +32,10 @@ enum stat_item { | |||
31 | DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ | 32 | DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */ |
32 | DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ | 33 | DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */ |
33 | DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ | 34 | DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */ |
35 | DEACTIVATE_BYPASS, /* Implicit deactivation */ | ||
34 | ORDER_FALLBACK, /* Number of times fallback was necessary */ | 36 | ORDER_FALLBACK, /* Number of times fallback was necessary */ |
35 | CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ | 37 | CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */ |
38 | CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */ | ||
36 | NR_SLUB_STAT_ITEMS }; | 39 | NR_SLUB_STAT_ITEMS }; |
37 | 40 | ||
38 | struct kmem_cache_cpu { | 41 | struct kmem_cache_cpu { |
@@ -2,10 +2,11 @@ | |||
2 | * SLUB: A slab allocator that limits cache line use instead of queuing | 2 | * SLUB: A slab allocator that limits cache line use instead of queuing |
3 | * objects in per cpu and per node lists. | 3 | * objects in per cpu and per node lists. |
4 | * | 4 | * |
5 | * The allocator synchronizes using per slab locks and only | 5 | * The allocator synchronizes using per slab locks or atomic operatios |
6 | * uses a centralized lock to manage a pool of partial slabs. | 6 | * and only uses a centralized lock to manage a pool of partial slabs. |
7 | * | 7 | * |
8 | * (C) 2007 SGI, Christoph Lameter | 8 | * (C) 2007 SGI, Christoph Lameter |
9 | * (C) 2011 Linux Foundation, Christoph Lameter | ||
9 | */ | 10 | */ |
10 | 11 | ||
11 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
@@ -33,15 +34,27 @@ | |||
33 | 34 | ||
34 | /* | 35 | /* |
35 | * Lock order: | 36 | * Lock order: |
36 | * 1. slab_lock(page) | 37 | * 1. slub_lock (Global Semaphore) |
37 | * 2. slab->list_lock | 38 | * 2. node->list_lock |
39 | * 3. slab_lock(page) (Only on some arches and for debugging) | ||
38 | * | 40 | * |
39 | * The slab_lock protects operations on the object of a particular | 41 | * slub_lock |
40 | * slab and its metadata in the page struct. If the slab lock | 42 | * |
41 | * has been taken then no allocations nor frees can be performed | 43 | * The role of the slub_lock is to protect the list of all the slabs |
42 | * on the objects in the slab nor can the slab be added or removed | 44 | * and to synchronize major metadata changes to slab cache structures. |
43 | * from the partial or full lists since this would mean modifying | 45 | * |
44 | * the page_struct of the slab. | 46 | * The slab_lock is only used for debugging and on arches that do not |
47 | * have the ability to do a cmpxchg_double. It only protects the second | ||
48 | * double word in the page struct. Meaning | ||
49 | * A. page->freelist -> List of object free in a page | ||
50 | * B. page->counters -> Counters of objects | ||
51 | * C. page->frozen -> frozen state | ||
52 | * | ||
53 | * If a slab is frozen then it is exempt from list management. It is not | ||
54 | * on any list. The processor that froze the slab is the one who can | ||
55 | * perform list operations on the page. Other processors may put objects | ||
56 | * onto the freelist but the processor that froze the slab is the only | ||
57 | * one that can retrieve the objects from the page's freelist. | ||
45 | * | 58 | * |
46 | * The list_lock protects the partial and full list on each node and | 59 | * The list_lock protects the partial and full list on each node and |
47 | * the partial slab counter. If taken then no new slabs may be added or | 60 | * the partial slab counter. If taken then no new slabs may be added or |
@@ -54,20 +67,6 @@ | |||
54 | * slabs, operations can continue without any centralized lock. F.e. | 67 | * slabs, operations can continue without any centralized lock. F.e. |
55 | * allocating a long series of objects that fill up slabs does not require | 68 | * allocating a long series of objects that fill up slabs does not require |
56 | * the list lock. | 69 | * the list lock. |
57 | * | ||
58 | * The lock order is sometimes inverted when we are trying to get a slab | ||
59 | * off a list. We take the list_lock and then look for a page on the list | ||
60 | * to use. While we do that objects in the slabs may be freed. We can | ||
61 | * only operate on the slab if we have also taken the slab_lock. So we use | ||
62 | * a slab_trylock() on the slab. If trylock was successful then no frees | ||
63 | * can occur anymore and we can use the slab for allocations etc. If the | ||
64 | * slab_trylock() does not succeed then frees are in progress in the slab and | ||
65 | * we must stay away from it for a while since we may cause a bouncing | ||
66 | * cacheline if we try to acquire the lock. So go onto the next slab. | ||
67 | * If all pages are busy then we may allocate a new slab instead of reusing | ||
68 | * a partial slab. A new slab has no one operating on it and thus there is | ||
69 | * no danger of cacheline contention. | ||
70 | * | ||
71 | * Interrupts are disabled during allocation and deallocation in order to | 70 | * Interrupts are disabled during allocation and deallocation in order to |
72 | * make the slab allocator safe to use in the context of an irq. In addition | 71 | * make the slab allocator safe to use in the context of an irq. In addition |
73 | * interrupts are disabled to ensure that the processor does not change | 72 | * interrupts are disabled to ensure that the processor does not change |
@@ -132,6 +131,9 @@ static inline int kmem_cache_debug(struct kmem_cache *s) | |||
132 | /* Enable to test recovery from slab corruption on boot */ | 131 | /* Enable to test recovery from slab corruption on boot */ |
133 | #undef SLUB_RESILIENCY_TEST | 132 | #undef SLUB_RESILIENCY_TEST |
134 | 133 | ||
134 | /* Enable to log cmpxchg failures */ | ||
135 | #undef SLUB_DEBUG_CMPXCHG | ||
136 | |||
135 | /* | 137 | /* |
136 | * Mininum number of partial slabs. These will be left on the partial | 138 | * Mininum number of partial slabs. These will be left on the partial |
137 | * lists even if they are empty. kmem_cache_shrink may reclaim them. | 139 | * lists even if they are empty. kmem_cache_shrink may reclaim them. |
@@ -167,10 +169,11 @@ static inline int kmem_cache_debug(struct kmem_cache *s) | |||
167 | 169 | ||
168 | #define OO_SHIFT 16 | 170 | #define OO_SHIFT 16 |
169 | #define OO_MASK ((1 << OO_SHIFT) - 1) | 171 | #define OO_MASK ((1 << OO_SHIFT) - 1) |
170 | #define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ | 172 | #define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */ |
171 | 173 | ||
172 | /* Internal SLUB flags */ | 174 | /* Internal SLUB flags */ |
173 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ | 175 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ |
176 | #define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */ | ||
174 | 177 | ||
175 | static int kmem_size = sizeof(struct kmem_cache); | 178 | static int kmem_size = sizeof(struct kmem_cache); |
176 | 179 | ||
@@ -343,11 +346,99 @@ static inline int oo_objects(struct kmem_cache_order_objects x) | |||
343 | return x.x & OO_MASK; | 346 | return x.x & OO_MASK; |
344 | } | 347 | } |
345 | 348 | ||
349 | /* | ||
350 | * Per slab locking using the pagelock | ||
351 | */ | ||
352 | static __always_inline void slab_lock(struct page *page) | ||
353 | { | ||
354 | bit_spin_lock(PG_locked, &page->flags); | ||
355 | } | ||
356 | |||
357 | static __always_inline void slab_unlock(struct page *page) | ||
358 | { | ||
359 | __bit_spin_unlock(PG_locked, &page->flags); | ||
360 | } | ||
361 | |||
362 | /* Interrupts must be disabled (for the fallback code to work right) */ | ||
363 | static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | ||
364 | void *freelist_old, unsigned long counters_old, | ||
365 | void *freelist_new, unsigned long counters_new, | ||
366 | const char *n) | ||
367 | { | ||
368 | VM_BUG_ON(!irqs_disabled()); | ||
369 | #ifdef CONFIG_CMPXCHG_DOUBLE | ||
370 | if (s->flags & __CMPXCHG_DOUBLE) { | ||
371 | if (cmpxchg_double(&page->freelist, | ||
372 | freelist_old, counters_old, | ||
373 | freelist_new, counters_new)) | ||
374 | return 1; | ||
375 | } else | ||
376 | #endif | ||
377 | { | ||
378 | slab_lock(page); | ||
379 | if (page->freelist == freelist_old && page->counters == counters_old) { | ||
380 | page->freelist = freelist_new; | ||
381 | page->counters = counters_new; | ||
382 | slab_unlock(page); | ||
383 | return 1; | ||
384 | } | ||
385 | slab_unlock(page); | ||
386 | } | ||
387 | |||
388 | cpu_relax(); | ||
389 | stat(s, CMPXCHG_DOUBLE_FAIL); | ||
390 | |||
391 | #ifdef SLUB_DEBUG_CMPXCHG | ||
392 | printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); | ||
393 | #endif | ||
394 | |||
395 | return 0; | ||
396 | } | ||
397 | |||
398 | static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, | ||
399 | void *freelist_old, unsigned long counters_old, | ||
400 | void *freelist_new, unsigned long counters_new, | ||
401 | const char *n) | ||
402 | { | ||
403 | #ifdef CONFIG_CMPXCHG_DOUBLE | ||
404 | if (s->flags & __CMPXCHG_DOUBLE) { | ||
405 | if (cmpxchg_double(&page->freelist, | ||
406 | freelist_old, counters_old, | ||
407 | freelist_new, counters_new)) | ||
408 | return 1; | ||
409 | } else | ||
410 | #endif | ||
411 | { | ||
412 | unsigned long flags; | ||
413 | |||
414 | local_irq_save(flags); | ||
415 | slab_lock(page); | ||
416 | if (page->freelist == freelist_old && page->counters == counters_old) { | ||
417 | page->freelist = freelist_new; | ||
418 | page->counters = counters_new; | ||
419 | slab_unlock(page); | ||
420 | local_irq_restore(flags); | ||
421 | return 1; | ||
422 | } | ||
423 | slab_unlock(page); | ||
424 | local_irq_restore(flags); | ||
425 | } | ||
426 | |||
427 | cpu_relax(); | ||
428 | stat(s, CMPXCHG_DOUBLE_FAIL); | ||
429 | |||
430 | #ifdef SLUB_DEBUG_CMPXCHG | ||
431 | printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); | ||
432 | #endif | ||
433 | |||
434 | return 0; | ||
435 | } | ||
436 | |||
346 | #ifdef CONFIG_SLUB_DEBUG | 437 | #ifdef CONFIG_SLUB_DEBUG |
347 | /* | 438 | /* |
348 | * Determine a map of object in use on a page. | 439 | * Determine a map of object in use on a page. |
349 | * | 440 | * |
350 | * Slab lock or node listlock must be held to guarantee that the page does | 441 | * Node listlock must be held to guarantee that the page does |
351 | * not vanish from under us. | 442 | * not vanish from under us. |
352 | */ | 443 | */ |
353 | static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) | 444 | static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) |
@@ -838,10 +929,11 @@ static int check_slab(struct kmem_cache *s, struct page *page) | |||
838 | static int on_freelist(struct kmem_cache *s, struct page *page, void *search) | 929 | static int on_freelist(struct kmem_cache *s, struct page *page, void *search) |
839 | { | 930 | { |
840 | int nr = 0; | 931 | int nr = 0; |
841 | void *fp = page->freelist; | 932 | void *fp; |
842 | void *object = NULL; | 933 | void *object = NULL; |
843 | unsigned long max_objects; | 934 | unsigned long max_objects; |
844 | 935 | ||
936 | fp = page->freelist; | ||
845 | while (fp && nr <= page->objects) { | 937 | while (fp && nr <= page->objects) { |
846 | if (fp == search) | 938 | if (fp == search) |
847 | return 1; | 939 | return 1; |
@@ -946,26 +1038,27 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) | |||
946 | 1038 | ||
947 | /* | 1039 | /* |
948 | * Tracking of fully allocated slabs for debugging purposes. | 1040 | * Tracking of fully allocated slabs for debugging purposes. |
1041 | * | ||
1042 | * list_lock must be held. | ||
949 | */ | 1043 | */ |
950 | static void add_full(struct kmem_cache_node *n, struct page *page) | 1044 | static void add_full(struct kmem_cache *s, |
1045 | struct kmem_cache_node *n, struct page *page) | ||
951 | { | 1046 | { |
952 | spin_lock(&n->list_lock); | 1047 | if (!(s->flags & SLAB_STORE_USER)) |
1048 | return; | ||
1049 | |||
953 | list_add(&page->lru, &n->full); | 1050 | list_add(&page->lru, &n->full); |
954 | spin_unlock(&n->list_lock); | ||
955 | } | 1051 | } |
956 | 1052 | ||
1053 | /* | ||
1054 | * list_lock must be held. | ||
1055 | */ | ||
957 | static void remove_full(struct kmem_cache *s, struct page *page) | 1056 | static void remove_full(struct kmem_cache *s, struct page *page) |
958 | { | 1057 | { |
959 | struct kmem_cache_node *n; | ||
960 | |||
961 | if (!(s->flags & SLAB_STORE_USER)) | 1058 | if (!(s->flags & SLAB_STORE_USER)) |
962 | return; | 1059 | return; |
963 | 1060 | ||
964 | n = get_node(s, page_to_nid(page)); | ||
965 | |||
966 | spin_lock(&n->list_lock); | ||
967 | list_del(&page->lru); | 1061 | list_del(&page->lru); |
968 | spin_unlock(&n->list_lock); | ||
969 | } | 1062 | } |
970 | 1063 | ||
971 | /* Tracking of the number of slabs for debugging purposes */ | 1064 | /* Tracking of the number of slabs for debugging purposes */ |
@@ -1021,11 +1114,6 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *pa | |||
1021 | if (!check_slab(s, page)) | 1114 | if (!check_slab(s, page)) |
1022 | goto bad; | 1115 | goto bad; |
1023 | 1116 | ||
1024 | if (!on_freelist(s, page, object)) { | ||
1025 | object_err(s, page, object, "Object already allocated"); | ||
1026 | goto bad; | ||
1027 | } | ||
1028 | |||
1029 | if (!check_valid_pointer(s, page, object)) { | 1117 | if (!check_valid_pointer(s, page, object)) { |
1030 | object_err(s, page, object, "Freelist Pointer check fails"); | 1118 | object_err(s, page, object, "Freelist Pointer check fails"); |
1031 | goto bad; | 1119 | goto bad; |
@@ -1058,6 +1146,12 @@ bad: | |||
1058 | static noinline int free_debug_processing(struct kmem_cache *s, | 1146 | static noinline int free_debug_processing(struct kmem_cache *s, |
1059 | struct page *page, void *object, unsigned long addr) | 1147 | struct page *page, void *object, unsigned long addr) |
1060 | { | 1148 | { |
1149 | unsigned long flags; | ||
1150 | int rc = 0; | ||
1151 | |||
1152 | local_irq_save(flags); | ||
1153 | slab_lock(page); | ||
1154 | |||
1061 | if (!check_slab(s, page)) | 1155 | if (!check_slab(s, page)) |
1062 | goto fail; | 1156 | goto fail; |
1063 | 1157 | ||
@@ -1072,7 +1166,7 @@ static noinline int free_debug_processing(struct kmem_cache *s, | |||
1072 | } | 1166 | } |
1073 | 1167 | ||
1074 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) | 1168 | if (!check_object(s, page, object, SLUB_RED_ACTIVE)) |
1075 | return 0; | 1169 | goto out; |
1076 | 1170 | ||
1077 | if (unlikely(s != page->slab)) { | 1171 | if (unlikely(s != page->slab)) { |
1078 | if (!PageSlab(page)) { | 1172 | if (!PageSlab(page)) { |
@@ -1089,18 +1183,19 @@ static noinline int free_debug_processing(struct kmem_cache *s, | |||
1089 | goto fail; | 1183 | goto fail; |
1090 | } | 1184 | } |
1091 | 1185 | ||
1092 | /* Special debug activities for freeing objects */ | ||
1093 | if (!PageSlubFrozen(page) && !page->freelist) | ||
1094 | remove_full(s, page); | ||
1095 | if (s->flags & SLAB_STORE_USER) | 1186 | if (s->flags & SLAB_STORE_USER) |
1096 | set_track(s, object, TRACK_FREE, addr); | 1187 | set_track(s, object, TRACK_FREE, addr); |
1097 | trace(s, page, object, 0); | 1188 | trace(s, page, object, 0); |
1098 | init_object(s, object, SLUB_RED_INACTIVE); | 1189 | init_object(s, object, SLUB_RED_INACTIVE); |
1099 | return 1; | 1190 | rc = 1; |
1191 | out: | ||
1192 | slab_unlock(page); | ||
1193 | local_irq_restore(flags); | ||
1194 | return rc; | ||
1100 | 1195 | ||
1101 | fail: | 1196 | fail: |
1102 | slab_fix(s, "Object at 0x%p not freed", object); | 1197 | slab_fix(s, "Object at 0x%p not freed", object); |
1103 | return 0; | 1198 | goto out; |
1104 | } | 1199 | } |
1105 | 1200 | ||
1106 | static int __init setup_slub_debug(char *str) | 1201 | static int __init setup_slub_debug(char *str) |
@@ -1200,7 +1295,9 @@ static inline int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
1200 | { return 1; } | 1295 | { return 1; } |
1201 | static inline int check_object(struct kmem_cache *s, struct page *page, | 1296 | static inline int check_object(struct kmem_cache *s, struct page *page, |
1202 | void *object, u8 val) { return 1; } | 1297 | void *object, u8 val) { return 1; } |
1203 | static inline void add_full(struct kmem_cache_node *n, struct page *page) {} | 1298 | static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n, |
1299 | struct page *page) {} | ||
1300 | static inline void remove_full(struct kmem_cache *s, struct page *page) {} | ||
1204 | static inline unsigned long kmem_cache_flags(unsigned long objsize, | 1301 | static inline unsigned long kmem_cache_flags(unsigned long objsize, |
1205 | unsigned long flags, const char *name, | 1302 | unsigned long flags, const char *name, |
1206 | void (*ctor)(void *)) | 1303 | void (*ctor)(void *)) |
@@ -1252,6 +1349,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1252 | struct kmem_cache_order_objects oo = s->oo; | 1349 | struct kmem_cache_order_objects oo = s->oo; |
1253 | gfp_t alloc_gfp; | 1350 | gfp_t alloc_gfp; |
1254 | 1351 | ||
1352 | flags &= gfp_allowed_mask; | ||
1353 | |||
1354 | if (flags & __GFP_WAIT) | ||
1355 | local_irq_enable(); | ||
1356 | |||
1255 | flags |= s->allocflags; | 1357 | flags |= s->allocflags; |
1256 | 1358 | ||
1257 | /* | 1359 | /* |
@@ -1268,12 +1370,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1268 | * Try a lower order alloc if possible | 1370 | * Try a lower order alloc if possible |
1269 | */ | 1371 | */ |
1270 | page = alloc_slab_page(flags, node, oo); | 1372 | page = alloc_slab_page(flags, node, oo); |
1271 | if (!page) | ||
1272 | return NULL; | ||
1273 | 1373 | ||
1274 | stat(s, ORDER_FALLBACK); | 1374 | if (page) |
1375 | stat(s, ORDER_FALLBACK); | ||
1275 | } | 1376 | } |
1276 | 1377 | ||
1378 | if (flags & __GFP_WAIT) | ||
1379 | local_irq_disable(); | ||
1380 | |||
1381 | if (!page) | ||
1382 | return NULL; | ||
1383 | |||
1277 | if (kmemcheck_enabled | 1384 | if (kmemcheck_enabled |
1278 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { | 1385 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { |
1279 | int pages = 1 << oo_order(oo); | 1386 | int pages = 1 << oo_order(oo); |
@@ -1341,6 +1448,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1341 | 1448 | ||
1342 | page->freelist = start; | 1449 | page->freelist = start; |
1343 | page->inuse = 0; | 1450 | page->inuse = 0; |
1451 | page->frozen = 1; | ||
1344 | out: | 1452 | out: |
1345 | return page; | 1453 | return page; |
1346 | } | 1454 | } |
@@ -1418,77 +1526,87 @@ static void discard_slab(struct kmem_cache *s, struct page *page) | |||
1418 | } | 1526 | } |
1419 | 1527 | ||
1420 | /* | 1528 | /* |
1421 | * Per slab locking using the pagelock | 1529 | * Management of partially allocated slabs. |
1422 | */ | 1530 | * |
1423 | static __always_inline void slab_lock(struct page *page) | 1531 | * list_lock must be held. |
1424 | { | ||
1425 | bit_spin_lock(PG_locked, &page->flags); | ||
1426 | } | ||
1427 | |||
1428 | static __always_inline void slab_unlock(struct page *page) | ||
1429 | { | ||
1430 | __bit_spin_unlock(PG_locked, &page->flags); | ||
1431 | } | ||
1432 | |||
1433 | static __always_inline int slab_trylock(struct page *page) | ||
1434 | { | ||
1435 | int rc = 1; | ||
1436 | |||
1437 | rc = bit_spin_trylock(PG_locked, &page->flags); | ||
1438 | return rc; | ||
1439 | } | ||
1440 | |||
1441 | /* | ||
1442 | * Management of partially allocated slabs | ||
1443 | */ | 1532 | */ |
1444 | static void add_partial(struct kmem_cache_node *n, | 1533 | static inline void add_partial(struct kmem_cache_node *n, |
1445 | struct page *page, int tail) | 1534 | struct page *page, int tail) |
1446 | { | 1535 | { |
1447 | spin_lock(&n->list_lock); | ||
1448 | n->nr_partial++; | 1536 | n->nr_partial++; |
1449 | if (tail) | 1537 | if (tail) |
1450 | list_add_tail(&page->lru, &n->partial); | 1538 | list_add_tail(&page->lru, &n->partial); |
1451 | else | 1539 | else |
1452 | list_add(&page->lru, &n->partial); | 1540 | list_add(&page->lru, &n->partial); |
1453 | spin_unlock(&n->list_lock); | ||
1454 | } | 1541 | } |
1455 | 1542 | ||
1456 | static inline void __remove_partial(struct kmem_cache_node *n, | 1543 | /* |
1544 | * list_lock must be held. | ||
1545 | */ | ||
1546 | static inline void remove_partial(struct kmem_cache_node *n, | ||
1457 | struct page *page) | 1547 | struct page *page) |
1458 | { | 1548 | { |
1459 | list_del(&page->lru); | 1549 | list_del(&page->lru); |
1460 | n->nr_partial--; | 1550 | n->nr_partial--; |
1461 | } | 1551 | } |
1462 | 1552 | ||
1463 | static void remove_partial(struct kmem_cache *s, struct page *page) | ||
1464 | { | ||
1465 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | ||
1466 | |||
1467 | spin_lock(&n->list_lock); | ||
1468 | __remove_partial(n, page); | ||
1469 | spin_unlock(&n->list_lock); | ||
1470 | } | ||
1471 | |||
1472 | /* | 1553 | /* |
1473 | * Lock slab and remove from the partial list. | 1554 | * Lock slab, remove from the partial list and put the object into the |
1555 | * per cpu freelist. | ||
1474 | * | 1556 | * |
1475 | * Must hold list_lock. | 1557 | * Must hold list_lock. |
1476 | */ | 1558 | */ |
1477 | static inline int lock_and_freeze_slab(struct kmem_cache_node *n, | 1559 | static inline int acquire_slab(struct kmem_cache *s, |
1478 | struct page *page) | 1560 | struct kmem_cache_node *n, struct page *page) |
1479 | { | 1561 | { |
1480 | if (slab_trylock(page)) { | 1562 | void *freelist; |
1481 | __remove_partial(n, page); | 1563 | unsigned long counters; |
1482 | __SetPageSlubFrozen(page); | 1564 | struct page new; |
1565 | |||
1566 | /* | ||
1567 | * Zap the freelist and set the frozen bit. | ||
1568 | * The old freelist is the list of objects for the | ||
1569 | * per cpu allocation list. | ||
1570 | */ | ||
1571 | do { | ||
1572 | freelist = page->freelist; | ||
1573 | counters = page->counters; | ||
1574 | new.counters = counters; | ||
1575 | new.inuse = page->objects; | ||
1576 | |||
1577 | VM_BUG_ON(new.frozen); | ||
1578 | new.frozen = 1; | ||
1579 | |||
1580 | } while (!__cmpxchg_double_slab(s, page, | ||
1581 | freelist, counters, | ||
1582 | NULL, new.counters, | ||
1583 | "lock and freeze")); | ||
1584 | |||
1585 | remove_partial(n, page); | ||
1586 | |||
1587 | if (freelist) { | ||
1588 | /* Populate the per cpu freelist */ | ||
1589 | this_cpu_write(s->cpu_slab->freelist, freelist); | ||
1590 | this_cpu_write(s->cpu_slab->page, page); | ||
1591 | this_cpu_write(s->cpu_slab->node, page_to_nid(page)); | ||
1483 | return 1; | 1592 | return 1; |
1593 | } else { | ||
1594 | /* | ||
1595 | * Slab page came from the wrong list. No object to allocate | ||
1596 | * from. Put it onto the correct list and continue partial | ||
1597 | * scan. | ||
1598 | */ | ||
1599 | printk(KERN_ERR "SLUB: %s : Page without available objects on" | ||
1600 | " partial list\n", s->name); | ||
1601 | return 0; | ||
1484 | } | 1602 | } |
1485 | return 0; | ||
1486 | } | 1603 | } |
1487 | 1604 | ||
1488 | /* | 1605 | /* |
1489 | * Try to allocate a partial slab from a specific node. | 1606 | * Try to allocate a partial slab from a specific node. |
1490 | */ | 1607 | */ |
1491 | static struct page *get_partial_node(struct kmem_cache_node *n) | 1608 | static struct page *get_partial_node(struct kmem_cache *s, |
1609 | struct kmem_cache_node *n) | ||
1492 | { | 1610 | { |
1493 | struct page *page; | 1611 | struct page *page; |
1494 | 1612 | ||
@@ -1503,7 +1621,7 @@ static struct page *get_partial_node(struct kmem_cache_node *n) | |||
1503 | 1621 | ||
1504 | spin_lock(&n->list_lock); | 1622 | spin_lock(&n->list_lock); |
1505 | list_for_each_entry(page, &n->partial, lru) | 1623 | list_for_each_entry(page, &n->partial, lru) |
1506 | if (lock_and_freeze_slab(n, page)) | 1624 | if (acquire_slab(s, n, page)) |
1507 | goto out; | 1625 | goto out; |
1508 | page = NULL; | 1626 | page = NULL; |
1509 | out: | 1627 | out: |
@@ -1554,7 +1672,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) | |||
1554 | 1672 | ||
1555 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && | 1673 | if (n && cpuset_zone_allowed_hardwall(zone, flags) && |
1556 | n->nr_partial > s->min_partial) { | 1674 | n->nr_partial > s->min_partial) { |
1557 | page = get_partial_node(n); | 1675 | page = get_partial_node(s, n); |
1558 | if (page) { | 1676 | if (page) { |
1559 | put_mems_allowed(); | 1677 | put_mems_allowed(); |
1560 | return page; | 1678 | return page; |
@@ -1574,60 +1692,13 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | |||
1574 | struct page *page; | 1692 | struct page *page; |
1575 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; | 1693 | int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; |
1576 | 1694 | ||
1577 | page = get_partial_node(get_node(s, searchnode)); | 1695 | page = get_partial_node(s, get_node(s, searchnode)); |
1578 | if (page || node != NUMA_NO_NODE) | 1696 | if (page || node != NUMA_NO_NODE) |
1579 | return page; | 1697 | return page; |
1580 | 1698 | ||
1581 | return get_any_partial(s, flags); | 1699 | return get_any_partial(s, flags); |
1582 | } | 1700 | } |
1583 | 1701 | ||
1584 | /* | ||
1585 | * Move a page back to the lists. | ||
1586 | * | ||
1587 | * Must be called with the slab lock held. | ||
1588 | * | ||
1589 | * On exit the slab lock will have been dropped. | ||
1590 | */ | ||
1591 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | ||
1592 | __releases(bitlock) | ||
1593 | { | ||
1594 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | ||
1595 | |||
1596 | __ClearPageSlubFrozen(page); | ||
1597 | if (page->inuse) { | ||
1598 | |||
1599 | if (page->freelist) { | ||
1600 | add_partial(n, page, tail); | ||
1601 | stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | ||
1602 | } else { | ||
1603 | stat(s, DEACTIVATE_FULL); | ||
1604 | if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER)) | ||
1605 | add_full(n, page); | ||
1606 | } | ||
1607 | slab_unlock(page); | ||
1608 | } else { | ||
1609 | stat(s, DEACTIVATE_EMPTY); | ||
1610 | if (n->nr_partial < s->min_partial) { | ||
1611 | /* | ||
1612 | * Adding an empty slab to the partial slabs in order | ||
1613 | * to avoid page allocator overhead. This slab needs | ||
1614 | * to come after the other slabs with objects in | ||
1615 | * so that the others get filled first. That way the | ||
1616 | * size of the partial list stays small. | ||
1617 | * | ||
1618 | * kmem_cache_shrink can reclaim any empty slabs from | ||
1619 | * the partial list. | ||
1620 | */ | ||
1621 | add_partial(n, page, 1); | ||
1622 | slab_unlock(page); | ||
1623 | } else { | ||
1624 | slab_unlock(page); | ||
1625 | stat(s, FREE_SLAB); | ||
1626 | discard_slab(s, page); | ||
1627 | } | ||
1628 | } | ||
1629 | } | ||
1630 | |||
1631 | #ifdef CONFIG_PREEMPT | 1702 | #ifdef CONFIG_PREEMPT |
1632 | /* | 1703 | /* |
1633 | * Calculate the next globally unique transaction for disambiguiation | 1704 | * Calculate the next globally unique transaction for disambiguiation |
@@ -1697,42 +1768,161 @@ void init_kmem_cache_cpus(struct kmem_cache *s) | |||
1697 | /* | 1768 | /* |
1698 | * Remove the cpu slab | 1769 | * Remove the cpu slab |
1699 | */ | 1770 | */ |
1771 | |||
1772 | /* | ||
1773 | * Remove the cpu slab | ||
1774 | */ | ||
1700 | static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1775 | static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1701 | __releases(bitlock) | ||
1702 | { | 1776 | { |
1777 | enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE }; | ||
1703 | struct page *page = c->page; | 1778 | struct page *page = c->page; |
1704 | int tail = 1; | 1779 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1705 | 1780 | int lock = 0; | |
1706 | if (page->freelist) | 1781 | enum slab_modes l = M_NONE, m = M_NONE; |
1782 | void *freelist; | ||
1783 | void *nextfree; | ||
1784 | int tail = 0; | ||
1785 | struct page new; | ||
1786 | struct page old; | ||
1787 | |||
1788 | if (page->freelist) { | ||
1707 | stat(s, DEACTIVATE_REMOTE_FREES); | 1789 | stat(s, DEACTIVATE_REMOTE_FREES); |
1790 | tail = 1; | ||
1791 | } | ||
1792 | |||
1793 | c->tid = next_tid(c->tid); | ||
1794 | c->page = NULL; | ||
1795 | freelist = c->freelist; | ||
1796 | c->freelist = NULL; | ||
1797 | |||
1798 | /* | ||
1799 | * Stage one: Free all available per cpu objects back | ||
1800 | * to the page freelist while it is still frozen. Leave the | ||
1801 | * last one. | ||
1802 | * | ||
1803 | * There is no need to take the list->lock because the page | ||
1804 | * is still frozen. | ||
1805 | */ | ||
1806 | while (freelist && (nextfree = get_freepointer(s, freelist))) { | ||
1807 | void *prior; | ||
1808 | unsigned long counters; | ||
1809 | |||
1810 | do { | ||
1811 | prior = page->freelist; | ||
1812 | counters = page->counters; | ||
1813 | set_freepointer(s, freelist, prior); | ||
1814 | new.counters = counters; | ||
1815 | new.inuse--; | ||
1816 | VM_BUG_ON(!new.frozen); | ||
1817 | |||
1818 | } while (!__cmpxchg_double_slab(s, page, | ||
1819 | prior, counters, | ||
1820 | freelist, new.counters, | ||
1821 | "drain percpu freelist")); | ||
1822 | |||
1823 | freelist = nextfree; | ||
1824 | } | ||
1825 | |||
1708 | /* | 1826 | /* |
1709 | * Merge cpu freelist into slab freelist. Typically we get here | 1827 | * Stage two: Ensure that the page is unfrozen while the |
1710 | * because both freelists are empty. So this is unlikely | 1828 | * list presence reflects the actual number of objects |
1711 | * to occur. | 1829 | * during unfreeze. |
1830 | * | ||
1831 | * We setup the list membership and then perform a cmpxchg | ||
1832 | * with the count. If there is a mismatch then the page | ||
1833 | * is not unfrozen but the page is on the wrong list. | ||
1834 | * | ||
1835 | * Then we restart the process which may have to remove | ||
1836 | * the page from the list that we just put it on again | ||
1837 | * because the number of objects in the slab may have | ||
1838 | * changed. | ||
1712 | */ | 1839 | */ |
1713 | while (unlikely(c->freelist)) { | 1840 | redo: |
1714 | void **object; | ||
1715 | 1841 | ||
1716 | tail = 0; /* Hot objects. Put the slab first */ | 1842 | old.freelist = page->freelist; |
1843 | old.counters = page->counters; | ||
1844 | VM_BUG_ON(!old.frozen); | ||
1717 | 1845 | ||
1718 | /* Retrieve object from cpu_freelist */ | 1846 | /* Determine target state of the slab */ |
1719 | object = c->freelist; | 1847 | new.counters = old.counters; |
1720 | c->freelist = get_freepointer(s, c->freelist); | 1848 | if (freelist) { |
1849 | new.inuse--; | ||
1850 | set_freepointer(s, freelist, old.freelist); | ||
1851 | new.freelist = freelist; | ||
1852 | } else | ||
1853 | new.freelist = old.freelist; | ||
1854 | |||
1855 | new.frozen = 0; | ||
1856 | |||
1857 | if (!new.inuse && n->nr_partial < s->min_partial) | ||
1858 | m = M_FREE; | ||
1859 | else if (new.freelist) { | ||
1860 | m = M_PARTIAL; | ||
1861 | if (!lock) { | ||
1862 | lock = 1; | ||
1863 | /* | ||
1864 | * Taking the spinlock removes the possiblity | ||
1865 | * that acquire_slab() will see a slab page that | ||
1866 | * is frozen | ||
1867 | */ | ||
1868 | spin_lock(&n->list_lock); | ||
1869 | } | ||
1870 | } else { | ||
1871 | m = M_FULL; | ||
1872 | if (kmem_cache_debug(s) && !lock) { | ||
1873 | lock = 1; | ||
1874 | /* | ||
1875 | * This also ensures that the scanning of full | ||
1876 | * slabs from diagnostic functions will not see | ||
1877 | * any frozen slabs. | ||
1878 | */ | ||
1879 | spin_lock(&n->list_lock); | ||
1880 | } | ||
1881 | } | ||
1882 | |||
1883 | if (l != m) { | ||
1884 | |||
1885 | if (l == M_PARTIAL) | ||
1886 | |||
1887 | remove_partial(n, page); | ||
1888 | |||
1889 | else if (l == M_FULL) | ||
1890 | |||
1891 | remove_full(s, page); | ||
1892 | |||
1893 | if (m == M_PARTIAL) { | ||
1894 | |||
1895 | add_partial(n, page, tail); | ||
1896 | stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | ||
1897 | |||
1898 | } else if (m == M_FULL) { | ||
1721 | 1899 | ||
1722 | /* And put onto the regular freelist */ | 1900 | stat(s, DEACTIVATE_FULL); |
1723 | set_freepointer(s, object, page->freelist); | 1901 | add_full(s, n, page); |
1724 | page->freelist = object; | 1902 | |
1725 | page->inuse--; | 1903 | } |
1904 | } | ||
1905 | |||
1906 | l = m; | ||
1907 | if (!__cmpxchg_double_slab(s, page, | ||
1908 | old.freelist, old.counters, | ||
1909 | new.freelist, new.counters, | ||
1910 | "unfreezing slab")) | ||
1911 | goto redo; | ||
1912 | |||
1913 | if (lock) | ||
1914 | spin_unlock(&n->list_lock); | ||
1915 | |||
1916 | if (m == M_FREE) { | ||
1917 | stat(s, DEACTIVATE_EMPTY); | ||
1918 | discard_slab(s, page); | ||
1919 | stat(s, FREE_SLAB); | ||
1726 | } | 1920 | } |
1727 | c->page = NULL; | ||
1728 | c->tid = next_tid(c->tid); | ||
1729 | unfreeze_slab(s, page, tail); | ||
1730 | } | 1921 | } |
1731 | 1922 | ||
1732 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1923 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1733 | { | 1924 | { |
1734 | stat(s, CPUSLAB_FLUSH); | 1925 | stat(s, CPUSLAB_FLUSH); |
1735 | slab_lock(c->page); | ||
1736 | deactivate_slab(s, c); | 1926 | deactivate_slab(s, c); |
1737 | } | 1927 | } |
1738 | 1928 | ||
@@ -1861,6 +2051,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1861 | void **object; | 2051 | void **object; |
1862 | struct page *page; | 2052 | struct page *page; |
1863 | unsigned long flags; | 2053 | unsigned long flags; |
2054 | struct page new; | ||
2055 | unsigned long counters; | ||
1864 | 2056 | ||
1865 | local_irq_save(flags); | 2057 | local_irq_save(flags); |
1866 | #ifdef CONFIG_PREEMPT | 2058 | #ifdef CONFIG_PREEMPT |
@@ -1879,72 +2071,97 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1879 | if (!page) | 2071 | if (!page) |
1880 | goto new_slab; | 2072 | goto new_slab; |
1881 | 2073 | ||
1882 | slab_lock(page); | 2074 | if (unlikely(!node_match(c, node))) { |
1883 | if (unlikely(!node_match(c, node))) | 2075 | stat(s, ALLOC_NODE_MISMATCH); |
1884 | goto another_slab; | 2076 | deactivate_slab(s, c); |
2077 | goto new_slab; | ||
2078 | } | ||
2079 | |||
2080 | stat(s, ALLOC_SLOWPATH); | ||
2081 | |||
2082 | do { | ||
2083 | object = page->freelist; | ||
2084 | counters = page->counters; | ||
2085 | new.counters = counters; | ||
2086 | VM_BUG_ON(!new.frozen); | ||
2087 | |||
2088 | /* | ||
2089 | * If there is no object left then we use this loop to | ||
2090 | * deactivate the slab which is simple since no objects | ||
2091 | * are left in the slab and therefore we do not need to | ||
2092 | * put the page back onto the partial list. | ||
2093 | * | ||
2094 | * If there are objects left then we retrieve them | ||
2095 | * and use them to refill the per cpu queue. | ||
2096 | */ | ||
2097 | |||
2098 | new.inuse = page->objects; | ||
2099 | new.frozen = object != NULL; | ||
2100 | |||
2101 | } while (!__cmpxchg_double_slab(s, page, | ||
2102 | object, counters, | ||
2103 | NULL, new.counters, | ||
2104 | "__slab_alloc")); | ||
2105 | |||
2106 | if (unlikely(!object)) { | ||
2107 | c->page = NULL; | ||
2108 | stat(s, DEACTIVATE_BYPASS); | ||
2109 | goto new_slab; | ||
2110 | } | ||
1885 | 2111 | ||
1886 | stat(s, ALLOC_REFILL); | 2112 | stat(s, ALLOC_REFILL); |
1887 | 2113 | ||
1888 | load_freelist: | 2114 | load_freelist: |
1889 | object = page->freelist; | 2115 | VM_BUG_ON(!page->frozen); |
1890 | if (unlikely(!object)) | ||
1891 | goto another_slab; | ||
1892 | if (kmem_cache_debug(s)) | ||
1893 | goto debug; | ||
1894 | |||
1895 | c->freelist = get_freepointer(s, object); | 2116 | c->freelist = get_freepointer(s, object); |
1896 | page->inuse = page->objects; | ||
1897 | page->freelist = NULL; | ||
1898 | |||
1899 | slab_unlock(page); | ||
1900 | c->tid = next_tid(c->tid); | 2117 | c->tid = next_tid(c->tid); |
1901 | local_irq_restore(flags); | 2118 | local_irq_restore(flags); |
1902 | stat(s, ALLOC_SLOWPATH); | ||
1903 | return object; | 2119 | return object; |
1904 | 2120 | ||
1905 | another_slab: | ||
1906 | deactivate_slab(s, c); | ||
1907 | |||
1908 | new_slab: | 2121 | new_slab: |
1909 | page = get_partial(s, gfpflags, node); | 2122 | page = get_partial(s, gfpflags, node); |
1910 | if (page) { | 2123 | if (page) { |
1911 | stat(s, ALLOC_FROM_PARTIAL); | 2124 | stat(s, ALLOC_FROM_PARTIAL); |
1912 | c->node = page_to_nid(page); | 2125 | object = c->freelist; |
1913 | c->page = page; | 2126 | |
2127 | if (kmem_cache_debug(s)) | ||
2128 | goto debug; | ||
1914 | goto load_freelist; | 2129 | goto load_freelist; |
1915 | } | 2130 | } |
1916 | 2131 | ||
1917 | gfpflags &= gfp_allowed_mask; | ||
1918 | if (gfpflags & __GFP_WAIT) | ||
1919 | local_irq_enable(); | ||
1920 | |||
1921 | page = new_slab(s, gfpflags, node); | 2132 | page = new_slab(s, gfpflags, node); |
1922 | 2133 | ||
1923 | if (gfpflags & __GFP_WAIT) | ||
1924 | local_irq_disable(); | ||
1925 | |||
1926 | if (page) { | 2134 | if (page) { |
1927 | c = __this_cpu_ptr(s->cpu_slab); | 2135 | c = __this_cpu_ptr(s->cpu_slab); |
1928 | stat(s, ALLOC_SLAB); | ||
1929 | if (c->page) | 2136 | if (c->page) |
1930 | flush_slab(s, c); | 2137 | flush_slab(s, c); |
1931 | 2138 | ||
1932 | slab_lock(page); | 2139 | /* |
1933 | __SetPageSlubFrozen(page); | 2140 | * No other reference to the page yet so we can |
2141 | * muck around with it freely without cmpxchg | ||
2142 | */ | ||
2143 | object = page->freelist; | ||
2144 | page->freelist = NULL; | ||
2145 | page->inuse = page->objects; | ||
2146 | |||
2147 | stat(s, ALLOC_SLAB); | ||
1934 | c->node = page_to_nid(page); | 2148 | c->node = page_to_nid(page); |
1935 | c->page = page; | 2149 | c->page = page; |
2150 | |||
2151 | if (kmem_cache_debug(s)) | ||
2152 | goto debug; | ||
1936 | goto load_freelist; | 2153 | goto load_freelist; |
1937 | } | 2154 | } |
1938 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) | 2155 | if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) |
1939 | slab_out_of_memory(s, gfpflags, node); | 2156 | slab_out_of_memory(s, gfpflags, node); |
1940 | local_irq_restore(flags); | 2157 | local_irq_restore(flags); |
1941 | return NULL; | 2158 | return NULL; |
2159 | |||
1942 | debug: | 2160 | debug: |
1943 | if (!alloc_debug_processing(s, page, object, addr)) | 2161 | if (!object || !alloc_debug_processing(s, page, object, addr)) |
1944 | goto another_slab; | 2162 | goto new_slab; |
1945 | 2163 | ||
1946 | page->inuse++; | 2164 | c->freelist = get_freepointer(s, object); |
1947 | page->freelist = get_freepointer(s, object); | ||
1948 | deactivate_slab(s, c); | 2165 | deactivate_slab(s, c); |
1949 | c->page = NULL; | 2166 | c->page = NULL; |
1950 | c->node = NUMA_NO_NODE; | 2167 | c->node = NUMA_NO_NODE; |
@@ -2096,40 +2313,75 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
2096 | { | 2313 | { |
2097 | void *prior; | 2314 | void *prior; |
2098 | void **object = (void *)x; | 2315 | void **object = (void *)x; |
2099 | unsigned long flags; | 2316 | int was_frozen; |
2317 | int inuse; | ||
2318 | struct page new; | ||
2319 | unsigned long counters; | ||
2320 | struct kmem_cache_node *n = NULL; | ||
2321 | unsigned long uninitialized_var(flags); | ||
2100 | 2322 | ||
2101 | local_irq_save(flags); | ||
2102 | slab_lock(page); | ||
2103 | stat(s, FREE_SLOWPATH); | 2323 | stat(s, FREE_SLOWPATH); |
2104 | 2324 | ||
2105 | if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) | 2325 | if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) |
2106 | goto out_unlock; | 2326 | return; |
2107 | 2327 | ||
2108 | prior = page->freelist; | 2328 | do { |
2109 | set_freepointer(s, object, prior); | 2329 | prior = page->freelist; |
2110 | page->freelist = object; | 2330 | counters = page->counters; |
2111 | page->inuse--; | 2331 | set_freepointer(s, object, prior); |
2332 | new.counters = counters; | ||
2333 | was_frozen = new.frozen; | ||
2334 | new.inuse--; | ||
2335 | if ((!new.inuse || !prior) && !was_frozen && !n) { | ||
2336 | n = get_node(s, page_to_nid(page)); | ||
2337 | /* | ||
2338 | * Speculatively acquire the list_lock. | ||
2339 | * If the cmpxchg does not succeed then we may | ||
2340 | * drop the list_lock without any processing. | ||
2341 | * | ||
2342 | * Otherwise the list_lock will synchronize with | ||
2343 | * other processors updating the list of slabs. | ||
2344 | */ | ||
2345 | spin_lock_irqsave(&n->list_lock, flags); | ||
2346 | } | ||
2347 | inuse = new.inuse; | ||
2112 | 2348 | ||
2113 | if (unlikely(PageSlubFrozen(page))) { | 2349 | } while (!cmpxchg_double_slab(s, page, |
2114 | stat(s, FREE_FROZEN); | 2350 | prior, counters, |
2115 | goto out_unlock; | 2351 | object, new.counters, |
2116 | } | 2352 | "__slab_free")); |
2117 | 2353 | ||
2118 | if (unlikely(!page->inuse)) | 2354 | if (likely(!n)) { |
2119 | goto slab_empty; | 2355 | /* |
2356 | * The list lock was not taken therefore no list | ||
2357 | * activity can be necessary. | ||
2358 | */ | ||
2359 | if (was_frozen) | ||
2360 | stat(s, FREE_FROZEN); | ||
2361 | return; | ||
2362 | } | ||
2120 | 2363 | ||
2121 | /* | 2364 | /* |
2122 | * Objects left in the slab. If it was not on the partial list before | 2365 | * was_frozen may have been set after we acquired the list_lock in |
2123 | * then add it. | 2366 | * an earlier loop. So we need to check it here again. |
2124 | */ | 2367 | */ |
2125 | if (unlikely(!prior)) { | 2368 | if (was_frozen) |
2126 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 2369 | stat(s, FREE_FROZEN); |
2127 | stat(s, FREE_ADD_PARTIAL); | 2370 | else { |
2128 | } | 2371 | if (unlikely(!inuse && n->nr_partial > s->min_partial)) |
2372 | goto slab_empty; | ||
2129 | 2373 | ||
2130 | out_unlock: | 2374 | /* |
2131 | slab_unlock(page); | 2375 | * Objects left in the slab. If it was not on the partial list before |
2132 | local_irq_restore(flags); | 2376 | * then add it. |
2377 | */ | ||
2378 | if (unlikely(!prior)) { | ||
2379 | remove_full(s, page); | ||
2380 | add_partial(n, page, 0); | ||
2381 | stat(s, FREE_ADD_PARTIAL); | ||
2382 | } | ||
2383 | } | ||
2384 | spin_unlock_irqrestore(&n->list_lock, flags); | ||
2133 | return; | 2385 | return; |
2134 | 2386 | ||
2135 | slab_empty: | 2387 | slab_empty: |
@@ -2137,11 +2389,11 @@ slab_empty: | |||
2137 | /* | 2389 | /* |
2138 | * Slab still on the partial list. | 2390 | * Slab still on the partial list. |
2139 | */ | 2391 | */ |
2140 | remove_partial(s, page); | 2392 | remove_partial(n, page); |
2141 | stat(s, FREE_REMOVE_PARTIAL); | 2393 | stat(s, FREE_REMOVE_PARTIAL); |
2142 | } | 2394 | } |
2143 | slab_unlock(page); | 2395 | |
2144 | local_irq_restore(flags); | 2396 | spin_unlock_irqrestore(&n->list_lock, flags); |
2145 | stat(s, FREE_SLAB); | 2397 | stat(s, FREE_SLAB); |
2146 | discard_slab(s, page); | 2398 | discard_slab(s, page); |
2147 | } | 2399 | } |
@@ -2415,7 +2667,6 @@ static void early_kmem_cache_node_alloc(int node) | |||
2415 | { | 2667 | { |
2416 | struct page *page; | 2668 | struct page *page; |
2417 | struct kmem_cache_node *n; | 2669 | struct kmem_cache_node *n; |
2418 | unsigned long flags; | ||
2419 | 2670 | ||
2420 | BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); | 2671 | BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); |
2421 | 2672 | ||
@@ -2433,6 +2684,7 @@ static void early_kmem_cache_node_alloc(int node) | |||
2433 | BUG_ON(!n); | 2684 | BUG_ON(!n); |
2434 | page->freelist = get_freepointer(kmem_cache_node, n); | 2685 | page->freelist = get_freepointer(kmem_cache_node, n); |
2435 | page->inuse++; | 2686 | page->inuse++; |
2687 | page->frozen = 0; | ||
2436 | kmem_cache_node->node[node] = n; | 2688 | kmem_cache_node->node[node] = n; |
2437 | #ifdef CONFIG_SLUB_DEBUG | 2689 | #ifdef CONFIG_SLUB_DEBUG |
2438 | init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); | 2690 | init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); |
@@ -2441,14 +2693,7 @@ static void early_kmem_cache_node_alloc(int node) | |||
2441 | init_kmem_cache_node(n, kmem_cache_node); | 2693 | init_kmem_cache_node(n, kmem_cache_node); |
2442 | inc_slabs_node(kmem_cache_node, node, page->objects); | 2694 | inc_slabs_node(kmem_cache_node, node, page->objects); |
2443 | 2695 | ||
2444 | /* | ||
2445 | * lockdep requires consistent irq usage for each lock | ||
2446 | * so even though there cannot be a race this early in | ||
2447 | * the boot sequence, we still disable irqs. | ||
2448 | */ | ||
2449 | local_irq_save(flags); | ||
2450 | add_partial(n, page, 0); | 2696 | add_partial(n, page, 0); |
2451 | local_irq_restore(flags); | ||
2452 | } | 2697 | } |
2453 | 2698 | ||
2454 | static void free_kmem_cache_nodes(struct kmem_cache *s) | 2699 | static void free_kmem_cache_nodes(struct kmem_cache *s) |
@@ -2654,6 +2899,12 @@ static int kmem_cache_open(struct kmem_cache *s, | |||
2654 | } | 2899 | } |
2655 | } | 2900 | } |
2656 | 2901 | ||
2902 | #ifdef CONFIG_CMPXCHG_DOUBLE | ||
2903 | if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0) | ||
2904 | /* Enable fast mode */ | ||
2905 | s->flags |= __CMPXCHG_DOUBLE; | ||
2906 | #endif | ||
2907 | |||
2657 | /* | 2908 | /* |
2658 | * The larger the object size is, the more pages we want on the partial | 2909 | * The larger the object size is, the more pages we want on the partial |
2659 | * list to avoid pounding the page allocator excessively. | 2910 | * list to avoid pounding the page allocator excessively. |
@@ -2726,7 +2977,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) | |||
2726 | spin_lock_irqsave(&n->list_lock, flags); | 2977 | spin_lock_irqsave(&n->list_lock, flags); |
2727 | list_for_each_entry_safe(page, h, &n->partial, lru) { | 2978 | list_for_each_entry_safe(page, h, &n->partial, lru) { |
2728 | if (!page->inuse) { | 2979 | if (!page->inuse) { |
2729 | __remove_partial(n, page); | 2980 | remove_partial(n, page); |
2730 | discard_slab(s, page); | 2981 | discard_slab(s, page); |
2731 | } else { | 2982 | } else { |
2732 | list_slab_objects(s, page, | 2983 | list_slab_objects(s, page, |
@@ -3094,14 +3345,8 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
3094 | * list_lock. page->inuse here is the upper limit. | 3345 | * list_lock. page->inuse here is the upper limit. |
3095 | */ | 3346 | */ |
3096 | list_for_each_entry_safe(page, t, &n->partial, lru) { | 3347 | list_for_each_entry_safe(page, t, &n->partial, lru) { |
3097 | if (!page->inuse && slab_trylock(page)) { | 3348 | if (!page->inuse) { |
3098 | /* | 3349 | remove_partial(n, page); |
3099 | * Must hold slab lock here because slab_free | ||
3100 | * may have freed the last object and be | ||
3101 | * waiting to release the slab. | ||
3102 | */ | ||
3103 | __remove_partial(n, page); | ||
3104 | slab_unlock(page); | ||
3105 | discard_slab(s, page); | 3350 | discard_slab(s, page); |
3106 | } else { | 3351 | } else { |
3107 | list_move(&page->lru, | 3352 | list_move(&page->lru, |
@@ -3689,12 +3934,9 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
3689 | static void validate_slab_slab(struct kmem_cache *s, struct page *page, | 3934 | static void validate_slab_slab(struct kmem_cache *s, struct page *page, |
3690 | unsigned long *map) | 3935 | unsigned long *map) |
3691 | { | 3936 | { |
3692 | if (slab_trylock(page)) { | 3937 | slab_lock(page); |
3693 | validate_slab(s, page, map); | 3938 | validate_slab(s, page, map); |
3694 | slab_unlock(page); | 3939 | slab_unlock(page); |
3695 | } else | ||
3696 | printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n", | ||
3697 | s->name, page); | ||
3698 | } | 3940 | } |
3699 | 3941 | ||
3700 | static int validate_slab_node(struct kmem_cache *s, | 3942 | static int validate_slab_node(struct kmem_cache *s, |
@@ -4342,8 +4584,10 @@ static ssize_t sanity_checks_store(struct kmem_cache *s, | |||
4342 | const char *buf, size_t length) | 4584 | const char *buf, size_t length) |
4343 | { | 4585 | { |
4344 | s->flags &= ~SLAB_DEBUG_FREE; | 4586 | s->flags &= ~SLAB_DEBUG_FREE; |
4345 | if (buf[0] == '1') | 4587 | if (buf[0] == '1') { |
4588 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
4346 | s->flags |= SLAB_DEBUG_FREE; | 4589 | s->flags |= SLAB_DEBUG_FREE; |
4590 | } | ||
4347 | return length; | 4591 | return length; |
4348 | } | 4592 | } |
4349 | SLAB_ATTR(sanity_checks); | 4593 | SLAB_ATTR(sanity_checks); |
@@ -4357,8 +4601,10 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf, | |||
4357 | size_t length) | 4601 | size_t length) |
4358 | { | 4602 | { |
4359 | s->flags &= ~SLAB_TRACE; | 4603 | s->flags &= ~SLAB_TRACE; |
4360 | if (buf[0] == '1') | 4604 | if (buf[0] == '1') { |
4605 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
4361 | s->flags |= SLAB_TRACE; | 4606 | s->flags |= SLAB_TRACE; |
4607 | } | ||
4362 | return length; | 4608 | return length; |
4363 | } | 4609 | } |
4364 | SLAB_ATTR(trace); | 4610 | SLAB_ATTR(trace); |
@@ -4375,8 +4621,10 @@ static ssize_t red_zone_store(struct kmem_cache *s, | |||
4375 | return -EBUSY; | 4621 | return -EBUSY; |
4376 | 4622 | ||
4377 | s->flags &= ~SLAB_RED_ZONE; | 4623 | s->flags &= ~SLAB_RED_ZONE; |
4378 | if (buf[0] == '1') | 4624 | if (buf[0] == '1') { |
4625 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
4379 | s->flags |= SLAB_RED_ZONE; | 4626 | s->flags |= SLAB_RED_ZONE; |
4627 | } | ||
4380 | calculate_sizes(s, -1); | 4628 | calculate_sizes(s, -1); |
4381 | return length; | 4629 | return length; |
4382 | } | 4630 | } |
@@ -4394,8 +4642,10 @@ static ssize_t poison_store(struct kmem_cache *s, | |||
4394 | return -EBUSY; | 4642 | return -EBUSY; |
4395 | 4643 | ||
4396 | s->flags &= ~SLAB_POISON; | 4644 | s->flags &= ~SLAB_POISON; |
4397 | if (buf[0] == '1') | 4645 | if (buf[0] == '1') { |
4646 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
4398 | s->flags |= SLAB_POISON; | 4647 | s->flags |= SLAB_POISON; |
4648 | } | ||
4399 | calculate_sizes(s, -1); | 4649 | calculate_sizes(s, -1); |
4400 | return length; | 4650 | return length; |
4401 | } | 4651 | } |
@@ -4413,8 +4663,10 @@ static ssize_t store_user_store(struct kmem_cache *s, | |||
4413 | return -EBUSY; | 4663 | return -EBUSY; |
4414 | 4664 | ||
4415 | s->flags &= ~SLAB_STORE_USER; | 4665 | s->flags &= ~SLAB_STORE_USER; |
4416 | if (buf[0] == '1') | 4666 | if (buf[0] == '1') { |
4667 | s->flags &= ~__CMPXCHG_DOUBLE; | ||
4417 | s->flags |= SLAB_STORE_USER; | 4668 | s->flags |= SLAB_STORE_USER; |
4669 | } | ||
4418 | calculate_sizes(s, -1); | 4670 | calculate_sizes(s, -1); |
4419 | return length; | 4671 | return length; |
4420 | } | 4672 | } |
@@ -4579,6 +4831,7 @@ STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial); | |||
4579 | STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); | 4831 | STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); |
4580 | STAT_ATTR(ALLOC_SLAB, alloc_slab); | 4832 | STAT_ATTR(ALLOC_SLAB, alloc_slab); |
4581 | STAT_ATTR(ALLOC_REFILL, alloc_refill); | 4833 | STAT_ATTR(ALLOC_REFILL, alloc_refill); |
4834 | STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch); | ||
4582 | STAT_ATTR(FREE_SLAB, free_slab); | 4835 | STAT_ATTR(FREE_SLAB, free_slab); |
4583 | STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); | 4836 | STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); |
4584 | STAT_ATTR(DEACTIVATE_FULL, deactivate_full); | 4837 | STAT_ATTR(DEACTIVATE_FULL, deactivate_full); |
@@ -4586,7 +4839,10 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty); | |||
4586 | STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); | 4839 | STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); |
4587 | STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); | 4840 | STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); |
4588 | STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); | 4841 | STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); |
4842 | STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass); | ||
4589 | STAT_ATTR(ORDER_FALLBACK, order_fallback); | 4843 | STAT_ATTR(ORDER_FALLBACK, order_fallback); |
4844 | STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail); | ||
4845 | STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail); | ||
4590 | #endif | 4846 | #endif |
4591 | 4847 | ||
4592 | static struct attribute *slab_attrs[] = { | 4848 | static struct attribute *slab_attrs[] = { |
@@ -4636,6 +4892,7 @@ static struct attribute *slab_attrs[] = { | |||
4636 | &alloc_from_partial_attr.attr, | 4892 | &alloc_from_partial_attr.attr, |
4637 | &alloc_slab_attr.attr, | 4893 | &alloc_slab_attr.attr, |
4638 | &alloc_refill_attr.attr, | 4894 | &alloc_refill_attr.attr, |
4895 | &alloc_node_mismatch_attr.attr, | ||
4639 | &free_slab_attr.attr, | 4896 | &free_slab_attr.attr, |
4640 | &cpuslab_flush_attr.attr, | 4897 | &cpuslab_flush_attr.attr, |
4641 | &deactivate_full_attr.attr, | 4898 | &deactivate_full_attr.attr, |
@@ -4643,7 +4900,10 @@ static struct attribute *slab_attrs[] = { | |||
4643 | &deactivate_to_head_attr.attr, | 4900 | &deactivate_to_head_attr.attr, |
4644 | &deactivate_to_tail_attr.attr, | 4901 | &deactivate_to_tail_attr.attr, |
4645 | &deactivate_remote_frees_attr.attr, | 4902 | &deactivate_remote_frees_attr.attr, |
4903 | &deactivate_bypass_attr.attr, | ||
4646 | &order_fallback_attr.attr, | 4904 | &order_fallback_attr.attr, |
4905 | &cmpxchg_double_fail_attr.attr, | ||
4906 | &cmpxchg_double_cpu_fail_attr.attr, | ||
4647 | #endif | 4907 | #endif |
4648 | #ifdef CONFIG_FAILSLAB | 4908 | #ifdef CONFIG_FAILSLAB |
4649 | &failslab_attr.attr, | 4909 | &failslab_attr.attr, |
diff --git a/tools/slub/slabinfo.c b/tools/slub/slabinfo.c index 516551c9f172..868cc93f7ac2 100644 --- a/tools/slub/slabinfo.c +++ b/tools/slub/slabinfo.c | |||
@@ -2,8 +2,9 @@ | |||
2 | * Slabinfo: Tool to get reports about slabs | 2 | * Slabinfo: Tool to get reports about slabs |
3 | * | 3 | * |
4 | * (C) 2007 sgi, Christoph Lameter | 4 | * (C) 2007 sgi, Christoph Lameter |
5 | * (C) 2011 Linux Foundation, Christoph Lameter | ||
5 | * | 6 | * |
6 | * Compile by: | 7 | * Compile with: |
7 | * | 8 | * |
8 | * gcc -o slabinfo slabinfo.c | 9 | * gcc -o slabinfo slabinfo.c |
9 | */ | 10 | */ |
@@ -39,6 +40,8 @@ struct slabinfo { | |||
39 | unsigned long cpuslab_flush, deactivate_full, deactivate_empty; | 40 | unsigned long cpuslab_flush, deactivate_full, deactivate_empty; |
40 | unsigned long deactivate_to_head, deactivate_to_tail; | 41 | unsigned long deactivate_to_head, deactivate_to_tail; |
41 | unsigned long deactivate_remote_frees, order_fallback; | 42 | unsigned long deactivate_remote_frees, order_fallback; |
43 | unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail; | ||
44 | unsigned long alloc_node_mismatch, deactivate_bypass; | ||
42 | int numa[MAX_NODES]; | 45 | int numa[MAX_NODES]; |
43 | int numa_partial[MAX_NODES]; | 46 | int numa_partial[MAX_NODES]; |
44 | } slabinfo[MAX_SLABS]; | 47 | } slabinfo[MAX_SLABS]; |
@@ -99,7 +102,7 @@ static void fatal(const char *x, ...) | |||
99 | 102 | ||
100 | static void usage(void) | 103 | static void usage(void) |
101 | { | 104 | { |
102 | printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n" | 105 | printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n" |
103 | "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" | 106 | "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n" |
104 | "-a|--aliases Show aliases\n" | 107 | "-a|--aliases Show aliases\n" |
105 | "-A|--activity Most active slabs first\n" | 108 | "-A|--activity Most active slabs first\n" |
@@ -293,7 +296,7 @@ int line = 0; | |||
293 | static void first_line(void) | 296 | static void first_line(void) |
294 | { | 297 | { |
295 | if (show_activity) | 298 | if (show_activity) |
296 | printf("Name Objects Alloc Free %%Fast Fallb O\n"); | 299 | printf("Name Objects Alloc Free %%Fast Fallb O CmpX UL\n"); |
297 | else | 300 | else |
298 | printf("Name Objects Objsize Space " | 301 | printf("Name Objects Objsize Space " |
299 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); | 302 | "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n"); |
@@ -379,14 +382,14 @@ static void show_tracking(struct slabinfo *s) | |||
379 | printf("\n%s: Kernel object allocation\n", s->name); | 382 | printf("\n%s: Kernel object allocation\n", s->name); |
380 | printf("-----------------------------------------------------------------------\n"); | 383 | printf("-----------------------------------------------------------------------\n"); |
381 | if (read_slab_obj(s, "alloc_calls")) | 384 | if (read_slab_obj(s, "alloc_calls")) |
382 | printf(buffer); | 385 | printf("%s", buffer); |
383 | else | 386 | else |
384 | printf("No Data\n"); | 387 | printf("No Data\n"); |
385 | 388 | ||
386 | printf("\n%s: Kernel object freeing\n", s->name); | 389 | printf("\n%s: Kernel object freeing\n", s->name); |
387 | printf("------------------------------------------------------------------------\n"); | 390 | printf("------------------------------------------------------------------------\n"); |
388 | if (read_slab_obj(s, "free_calls")) | 391 | if (read_slab_obj(s, "free_calls")) |
389 | printf(buffer); | 392 | printf("%s", buffer); |
390 | else | 393 | else |
391 | printf("No Data\n"); | 394 | printf("No Data\n"); |
392 | 395 | ||
@@ -400,7 +403,7 @@ static void ops(struct slabinfo *s) | |||
400 | if (read_slab_obj(s, "ops")) { | 403 | if (read_slab_obj(s, "ops")) { |
401 | printf("\n%s: kmem_cache operations\n", s->name); | 404 | printf("\n%s: kmem_cache operations\n", s->name); |
402 | printf("--------------------------------------------\n"); | 405 | printf("--------------------------------------------\n"); |
403 | printf(buffer); | 406 | printf("%s", buffer); |
404 | } else | 407 | } else |
405 | printf("\n%s has no kmem_cache operations\n", s->name); | 408 | printf("\n%s has no kmem_cache operations\n", s->name); |
406 | } | 409 | } |
@@ -462,19 +465,32 @@ static void slab_stats(struct slabinfo *s) | |||
462 | if (s->cpuslab_flush) | 465 | if (s->cpuslab_flush) |
463 | printf("Flushes %8lu\n", s->cpuslab_flush); | 466 | printf("Flushes %8lu\n", s->cpuslab_flush); |
464 | 467 | ||
465 | if (s->alloc_refill) | ||
466 | printf("Refill %8lu\n", s->alloc_refill); | ||
467 | |||
468 | total = s->deactivate_full + s->deactivate_empty + | 468 | total = s->deactivate_full + s->deactivate_empty + |
469 | s->deactivate_to_head + s->deactivate_to_tail; | 469 | s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass; |
470 | 470 | ||
471 | if (total) | 471 | if (total) { |
472 | printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) " | 472 | printf("\nSlab Deactivation Ocurrences %%\n"); |
473 | "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n", | 473 | printf("-------------------------------------------------\n"); |
474 | s->deactivate_full, (s->deactivate_full * 100) / total, | 474 | printf("Slab full %7lu %3lu%%\n", |
475 | s->deactivate_empty, (s->deactivate_empty * 100) / total, | 475 | s->deactivate_full, (s->deactivate_full * 100) / total); |
476 | s->deactivate_to_head, (s->deactivate_to_head * 100) / total, | 476 | printf("Slab empty %7lu %3lu%%\n", |
477 | s->deactivate_empty, (s->deactivate_empty * 100) / total); | ||
478 | printf("Moved to head of partial list %7lu %3lu%%\n", | ||
479 | s->deactivate_to_head, (s->deactivate_to_head * 100) / total); | ||
480 | printf("Moved to tail of partial list %7lu %3lu%%\n", | ||
477 | s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); | 481 | s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total); |
482 | printf("Deactivation bypass %7lu %3lu%%\n", | ||
483 | s->deactivate_bypass, (s->deactivate_bypass * 100) / total); | ||
484 | printf("Refilled from foreign frees %7lu %3lu%%\n", | ||
485 | s->alloc_refill, (s->alloc_refill * 100) / total); | ||
486 | printf("Node mismatch %7lu %3lu%%\n", | ||
487 | s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total); | ||
488 | } | ||
489 | |||
490 | if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) | ||
491 | printf("\nCmpxchg_double Looping\n------------------------\n"); | ||
492 | printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n", | ||
493 | s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail); | ||
478 | } | 494 | } |
479 | 495 | ||
480 | static void report(struct slabinfo *s) | 496 | static void report(struct slabinfo *s) |
@@ -573,12 +589,13 @@ static void slabcache(struct slabinfo *s) | |||
573 | total_alloc = s->alloc_fastpath + s->alloc_slowpath; | 589 | total_alloc = s->alloc_fastpath + s->alloc_slowpath; |
574 | total_free = s->free_fastpath + s->free_slowpath; | 590 | total_free = s->free_fastpath + s->free_slowpath; |
575 | 591 | ||
576 | printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d\n", | 592 | printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d %4ld %4ld\n", |
577 | s->name, s->objects, | 593 | s->name, s->objects, |
578 | total_alloc, total_free, | 594 | total_alloc, total_free, |
579 | total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, | 595 | total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0, |
580 | total_free ? (s->free_fastpath * 100 / total_free) : 0, | 596 | total_free ? (s->free_fastpath * 100 / total_free) : 0, |
581 | s->order_fallback, s->order); | 597 | s->order_fallback, s->order, s->cmpxchg_double_fail, |
598 | s->cmpxchg_double_cpu_fail); | ||
582 | } | 599 | } |
583 | else | 600 | else |
584 | printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", | 601 | printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n", |
@@ -1190,6 +1207,10 @@ static void read_slab_dir(void) | |||
1190 | slab->deactivate_to_tail = get_obj("deactivate_to_tail"); | 1207 | slab->deactivate_to_tail = get_obj("deactivate_to_tail"); |
1191 | slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); | 1208 | slab->deactivate_remote_frees = get_obj("deactivate_remote_frees"); |
1192 | slab->order_fallback = get_obj("order_fallback"); | 1209 | slab->order_fallback = get_obj("order_fallback"); |
1210 | slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail"); | ||
1211 | slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail"); | ||
1212 | slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); | ||
1213 | slab->deactivate_bypass = get_obj("deactivate_bypass"); | ||
1193 | chdir(".."); | 1214 | chdir(".."); |
1194 | if (slab->name[0] == ':') | 1215 | if (slab->name[0] == ':') |
1195 | alias_targets++; | 1216 | alias_targets++; |