diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-22 11:10:34 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-22 11:10:34 -0500 |
commit | 24f971abbda045c24d5d6f2438a7785567d2fde9 (patch) | |
tree | a4df2b80eafa1199625b53464bcf34e786a03a28 | |
parent | 3bab0bf045e1cc4880e2cfc9351e52cf7ec8e35e (diff) | |
parent | 721ae22ae1a51c25871b7a0b543877aa94ff2a20 (diff) |
Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull SLAB changes from Pekka Enberg:
"The patches from Joonsoo Kim switch mm/slab.c to use 'struct page' for
slab internals similar to mm/slub.c. This reduces memory usage and
improves performance:
https://lkml.org/lkml/2013/10/16/155
Rest of the changes are bug fixes from various people"
* 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (21 commits)
mm, slub: fix the typo in mm/slub.c
mm, slub: fix the typo in include/linux/slub_def.h
slub: Handle NULL parameter in kmem_cache_flags
slab: replace non-existing 'struct freelist *' with 'void *'
slab: fix to calm down kmemleak warning
slub: proper kmemleak tracking if CONFIG_SLUB_DEBUG disabled
slab: rename slab_bufctl to slab_freelist
slab: remove useless statement for checking pfmemalloc
slab: use struct page for slab management
slab: replace free and inuse in struct slab with newly introduced active
slab: remove SLAB_LIMIT
slab: remove kmem_bufctl_t
slab: change the management method of free objects of the slab
slab: use __GFP_COMP flag for allocating slab pages
slab: use well-defined macro, virt_to_slab()
slab: overloading the RCU head over the LRU for RCU free
slab: remove cachep in struct slab_rcu
slab: remove nodeid in struct slab
slab: remove colouroff in struct slab
slab: change return type of kmem_getpages() to struct page
...
-rw-r--r-- | include/linux/mm_types.h | 24 | ||||
-rw-r--r-- | include/linux/slab.h | 9 | ||||
-rw-r--r-- | include/linux/slab_def.h | 4 | ||||
-rw-r--r-- | include/linux/slub_def.h | 2 | ||||
-rw-r--r-- | mm/slab.c | 571 | ||||
-rw-r--r-- | mm/slub.c | 45 |
6 files changed, 280 insertions, 375 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 011eb85d7b0f..bd299418a934 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -44,18 +44,22 @@ struct page { | |||
44 | /* First double word block */ | 44 | /* First double word block */ |
45 | unsigned long flags; /* Atomic flags, some possibly | 45 | unsigned long flags; /* Atomic flags, some possibly |
46 | * updated asynchronously */ | 46 | * updated asynchronously */ |
47 | struct address_space *mapping; /* If low bit clear, points to | 47 | union { |
48 | * inode address_space, or NULL. | 48 | struct address_space *mapping; /* If low bit clear, points to |
49 | * If page mapped as anonymous | 49 | * inode address_space, or NULL. |
50 | * memory, low bit is set, and | 50 | * If page mapped as anonymous |
51 | * it points to anon_vma object: | 51 | * memory, low bit is set, and |
52 | * see PAGE_MAPPING_ANON below. | 52 | * it points to anon_vma object: |
53 | */ | 53 | * see PAGE_MAPPING_ANON below. |
54 | */ | ||
55 | void *s_mem; /* slab first object */ | ||
56 | }; | ||
57 | |||
54 | /* Second double word */ | 58 | /* Second double word */ |
55 | struct { | 59 | struct { |
56 | union { | 60 | union { |
57 | pgoff_t index; /* Our offset within mapping. */ | 61 | pgoff_t index; /* Our offset within mapping. */ |
58 | void *freelist; /* slub/slob first free object */ | 62 | void *freelist; /* sl[aou]b first free object */ |
59 | bool pfmemalloc; /* If set by the page allocator, | 63 | bool pfmemalloc; /* If set by the page allocator, |
60 | * ALLOC_NO_WATERMARKS was set | 64 | * ALLOC_NO_WATERMARKS was set |
61 | * and the low watermark was not | 65 | * and the low watermark was not |
@@ -111,6 +115,7 @@ struct page { | |||
111 | }; | 115 | }; |
112 | atomic_t _count; /* Usage count, see below. */ | 116 | atomic_t _count; /* Usage count, see below. */ |
113 | }; | 117 | }; |
118 | unsigned int active; /* SLAB */ | ||
114 | }; | 119 | }; |
115 | }; | 120 | }; |
116 | 121 | ||
@@ -132,6 +137,9 @@ struct page { | |||
132 | 137 | ||
133 | struct list_head list; /* slobs list of pages */ | 138 | struct list_head list; /* slobs list of pages */ |
134 | struct slab *slab_page; /* slab fields */ | 139 | struct slab *slab_page; /* slab fields */ |
140 | struct rcu_head rcu_head; /* Used by SLAB | ||
141 | * when destroying via RCU | ||
142 | */ | ||
135 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS | 143 | #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS |
136 | pgtable_t pmd_huge_pte; /* protected by page->ptl */ | 144 | pgtable_t pmd_huge_pte; /* protected by page->ptl */ |
137 | #endif | 145 | #endif |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 74f105847d13..c2bba248fa63 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -53,7 +53,14 @@ | |||
53 | * } | 53 | * } |
54 | * rcu_read_unlock(); | 54 | * rcu_read_unlock(); |
55 | * | 55 | * |
56 | * See also the comment on struct slab_rcu in mm/slab.c. | 56 | * This is useful if we need to approach a kernel structure obliquely, |
57 | * from its address obtained without the usual locking. We can lock | ||
58 | * the structure to stabilize it and check it's still at the given address, | ||
59 | * only if we can be sure that the memory has not been meanwhile reused | ||
60 | * for some other kind of object (which our subsystem's lock might corrupt). | ||
61 | * | ||
62 | * rcu_read_lock before reading the address, then rcu_read_unlock after | ||
63 | * taking the spinlock within the structure expected at that address. | ||
57 | */ | 64 | */ |
58 | #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ | 65 | #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ |
59 | #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ | 66 | #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index e9346b4f1ef4..09bfffb08a56 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
@@ -27,8 +27,8 @@ struct kmem_cache { | |||
27 | 27 | ||
28 | size_t colour; /* cache colouring range */ | 28 | size_t colour; /* cache colouring range */ |
29 | unsigned int colour_off; /* colour offset */ | 29 | unsigned int colour_off; /* colour offset */ |
30 | struct kmem_cache *slabp_cache; | 30 | struct kmem_cache *freelist_cache; |
31 | unsigned int slab_size; | 31 | unsigned int freelist_size; |
32 | 32 | ||
33 | /* constructor func */ | 33 | /* constructor func */ |
34 | void (*ctor)(void *obj); | 34 | void (*ctor)(void *obj); |
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index cc0b67eada42..f56bfa9e4526 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -11,7 +11,7 @@ | |||
11 | enum stat_item { | 11 | enum stat_item { |
12 | ALLOC_FASTPATH, /* Allocation from cpu slab */ | 12 | ALLOC_FASTPATH, /* Allocation from cpu slab */ |
13 | ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ | 13 | ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */ |
14 | FREE_FASTPATH, /* Free to cpu slub */ | 14 | FREE_FASTPATH, /* Free to cpu slab */ |
15 | FREE_SLOWPATH, /* Freeing not to cpu slab */ | 15 | FREE_SLOWPATH, /* Freeing not to cpu slab */ |
16 | FREE_FROZEN, /* Freeing to frozen slab */ | 16 | FREE_FROZEN, /* Freeing to frozen slab */ |
17 | FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ | 17 | FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */ |
@@ -164,72 +164,6 @@ | |||
164 | static bool pfmemalloc_active __read_mostly; | 164 | static bool pfmemalloc_active __read_mostly; |
165 | 165 | ||
166 | /* | 166 | /* |
167 | * kmem_bufctl_t: | ||
168 | * | ||
169 | * Bufctl's are used for linking objs within a slab | ||
170 | * linked offsets. | ||
171 | * | ||
172 | * This implementation relies on "struct page" for locating the cache & | ||
173 | * slab an object belongs to. | ||
174 | * This allows the bufctl structure to be small (one int), but limits | ||
175 | * the number of objects a slab (not a cache) can contain when off-slab | ||
176 | * bufctls are used. The limit is the size of the largest general cache | ||
177 | * that does not use off-slab slabs. | ||
178 | * For 32bit archs with 4 kB pages, is this 56. | ||
179 | * This is not serious, as it is only for large objects, when it is unwise | ||
180 | * to have too many per slab. | ||
181 | * Note: This limit can be raised by introducing a general cache whose size | ||
182 | * is less than 512 (PAGE_SIZE<<3), but greater than 256. | ||
183 | */ | ||
184 | |||
185 | typedef unsigned int kmem_bufctl_t; | ||
186 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) | ||
187 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) | ||
188 | #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) | ||
189 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) | ||
190 | |||
191 | /* | ||
192 | * struct slab_rcu | ||
193 | * | ||
194 | * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to | ||
195 | * arrange for kmem_freepages to be called via RCU. This is useful if | ||
196 | * we need to approach a kernel structure obliquely, from its address | ||
197 | * obtained without the usual locking. We can lock the structure to | ||
198 | * stabilize it and check it's still at the given address, only if we | ||
199 | * can be sure that the memory has not been meanwhile reused for some | ||
200 | * other kind of object (which our subsystem's lock might corrupt). | ||
201 | * | ||
202 | * rcu_read_lock before reading the address, then rcu_read_unlock after | ||
203 | * taking the spinlock within the structure expected at that address. | ||
204 | */ | ||
205 | struct slab_rcu { | ||
206 | struct rcu_head head; | ||
207 | struct kmem_cache *cachep; | ||
208 | void *addr; | ||
209 | }; | ||
210 | |||
211 | /* | ||
212 | * struct slab | ||
213 | * | ||
214 | * Manages the objs in a slab. Placed either at the beginning of mem allocated | ||
215 | * for a slab, or allocated from an general cache. | ||
216 | * Slabs are chained into three list: fully used, partial, fully free slabs. | ||
217 | */ | ||
218 | struct slab { | ||
219 | union { | ||
220 | struct { | ||
221 | struct list_head list; | ||
222 | unsigned long colouroff; | ||
223 | void *s_mem; /* including colour offset */ | ||
224 | unsigned int inuse; /* num of objs active in slab */ | ||
225 | kmem_bufctl_t free; | ||
226 | unsigned short nodeid; | ||
227 | }; | ||
228 | struct slab_rcu __slab_cover_slab_rcu; | ||
229 | }; | ||
230 | }; | ||
231 | |||
232 | /* | ||
233 | * struct array_cache | 167 | * struct array_cache |
234 | * | 168 | * |
235 | * Purpose: | 169 | * Purpose: |
@@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj) | |||
456 | return page->slab_cache; | 390 | return page->slab_cache; |
457 | } | 391 | } |
458 | 392 | ||
459 | static inline struct slab *virt_to_slab(const void *obj) | 393 | static inline void *index_to_obj(struct kmem_cache *cache, struct page *page, |
460 | { | ||
461 | struct page *page = virt_to_head_page(obj); | ||
462 | |||
463 | VM_BUG_ON(!PageSlab(page)); | ||
464 | return page->slab_page; | ||
465 | } | ||
466 | |||
467 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, | ||
468 | unsigned int idx) | 394 | unsigned int idx) |
469 | { | 395 | { |
470 | return slab->s_mem + cache->size * idx; | 396 | return page->s_mem + cache->size * idx; |
471 | } | 397 | } |
472 | 398 | ||
473 | /* | 399 | /* |
@@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, | |||
477 | * reciprocal_divide(offset, cache->reciprocal_buffer_size) | 403 | * reciprocal_divide(offset, cache->reciprocal_buffer_size) |
478 | */ | 404 | */ |
479 | static inline unsigned int obj_to_index(const struct kmem_cache *cache, | 405 | static inline unsigned int obj_to_index(const struct kmem_cache *cache, |
480 | const struct slab *slab, void *obj) | 406 | const struct page *page, void *obj) |
481 | { | 407 | { |
482 | u32 offset = (obj - slab->s_mem); | 408 | u32 offset = (obj - page->s_mem); |
483 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); | 409 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); |
484 | } | 410 | } |
485 | 411 | ||
@@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
641 | 567 | ||
642 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) | 568 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) |
643 | { | 569 | { |
644 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); | 570 | return ALIGN(nr_objs * sizeof(unsigned int), align); |
645 | } | 571 | } |
646 | 572 | ||
647 | /* | 573 | /* |
@@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
660 | * on it. For the latter case, the memory allocated for a | 586 | * on it. For the latter case, the memory allocated for a |
661 | * slab is used for: | 587 | * slab is used for: |
662 | * | 588 | * |
663 | * - The struct slab | 589 | * - One unsigned int for each object |
664 | * - One kmem_bufctl_t for each object | ||
665 | * - Padding to respect alignment of @align | 590 | * - Padding to respect alignment of @align |
666 | * - @buffer_size bytes for each object | 591 | * - @buffer_size bytes for each object |
667 | * | 592 | * |
@@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
674 | mgmt_size = 0; | 599 | mgmt_size = 0; |
675 | nr_objs = slab_size / buffer_size; | 600 | nr_objs = slab_size / buffer_size; |
676 | 601 | ||
677 | if (nr_objs > SLAB_LIMIT) | ||
678 | nr_objs = SLAB_LIMIT; | ||
679 | } else { | 602 | } else { |
680 | /* | 603 | /* |
681 | * Ignore padding for the initial guess. The padding | 604 | * Ignore padding for the initial guess. The padding |
@@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
685 | * into the memory allocation when taking the padding | 608 | * into the memory allocation when taking the padding |
686 | * into account. | 609 | * into account. |
687 | */ | 610 | */ |
688 | nr_objs = (slab_size - sizeof(struct slab)) / | 611 | nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int)); |
689 | (buffer_size + sizeof(kmem_bufctl_t)); | ||
690 | 612 | ||
691 | /* | 613 | /* |
692 | * This calculated number will be either the right | 614 | * This calculated number will be either the right |
@@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
696 | > slab_size) | 618 | > slab_size) |
697 | nr_objs--; | 619 | nr_objs--; |
698 | 620 | ||
699 | if (nr_objs > SLAB_LIMIT) | ||
700 | nr_objs = SLAB_LIMIT; | ||
701 | |||
702 | mgmt_size = slab_mgmt_size(nr_objs, align); | 621 | mgmt_size = slab_mgmt_size(nr_objs, align); |
703 | } | 622 | } |
704 | *num = nr_objs; | 623 | *num = nr_objs; |
@@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
829 | return nc; | 748 | return nc; |
830 | } | 749 | } |
831 | 750 | ||
832 | static inline bool is_slab_pfmemalloc(struct slab *slabp) | 751 | static inline bool is_slab_pfmemalloc(struct page *page) |
833 | { | 752 | { |
834 | struct page *page = virt_to_page(slabp->s_mem); | ||
835 | |||
836 | return PageSlabPfmemalloc(page); | 753 | return PageSlabPfmemalloc(page); |
837 | } | 754 | } |
838 | 755 | ||
@@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep, | |||
841 | struct array_cache *ac) | 758 | struct array_cache *ac) |
842 | { | 759 | { |
843 | struct kmem_cache_node *n = cachep->node[numa_mem_id()]; | 760 | struct kmem_cache_node *n = cachep->node[numa_mem_id()]; |
844 | struct slab *slabp; | 761 | struct page *page; |
845 | unsigned long flags; | 762 | unsigned long flags; |
846 | 763 | ||
847 | if (!pfmemalloc_active) | 764 | if (!pfmemalloc_active) |
848 | return; | 765 | return; |
849 | 766 | ||
850 | spin_lock_irqsave(&n->list_lock, flags); | 767 | spin_lock_irqsave(&n->list_lock, flags); |
851 | list_for_each_entry(slabp, &n->slabs_full, list) | 768 | list_for_each_entry(page, &n->slabs_full, lru) |
852 | if (is_slab_pfmemalloc(slabp)) | 769 | if (is_slab_pfmemalloc(page)) |
853 | goto out; | 770 | goto out; |
854 | 771 | ||
855 | list_for_each_entry(slabp, &n->slabs_partial, list) | 772 | list_for_each_entry(page, &n->slabs_partial, lru) |
856 | if (is_slab_pfmemalloc(slabp)) | 773 | if (is_slab_pfmemalloc(page)) |
857 | goto out; | 774 | goto out; |
858 | 775 | ||
859 | list_for_each_entry(slabp, &n->slabs_free, list) | 776 | list_for_each_entry(page, &n->slabs_free, lru) |
860 | if (is_slab_pfmemalloc(slabp)) | 777 | if (is_slab_pfmemalloc(page)) |
861 | goto out; | 778 | goto out; |
862 | 779 | ||
863 | pfmemalloc_active = false; | 780 | pfmemalloc_active = false; |
@@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, | |||
897 | */ | 814 | */ |
898 | n = cachep->node[numa_mem_id()]; | 815 | n = cachep->node[numa_mem_id()]; |
899 | if (!list_empty(&n->slabs_free) && force_refill) { | 816 | if (!list_empty(&n->slabs_free) && force_refill) { |
900 | struct slab *slabp = virt_to_slab(objp); | 817 | struct page *page = virt_to_head_page(objp); |
901 | ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); | 818 | ClearPageSlabPfmemalloc(page); |
902 | clear_obj_pfmemalloc(&objp); | 819 | clear_obj_pfmemalloc(&objp); |
903 | recheck_pfmemalloc_active(cachep, ac); | 820 | recheck_pfmemalloc_active(cachep, ac); |
904 | return objp; | 821 | return objp; |
@@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep, | |||
1099 | 1016 | ||
1100 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | 1017 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) |
1101 | { | 1018 | { |
1102 | struct slab *slabp = virt_to_slab(objp); | 1019 | int nodeid = page_to_nid(virt_to_page(objp)); |
1103 | int nodeid = slabp->nodeid; | ||
1104 | struct kmem_cache_node *n; | 1020 | struct kmem_cache_node *n; |
1105 | struct array_cache *alien = NULL; | 1021 | struct array_cache *alien = NULL; |
1106 | int node; | 1022 | int node; |
@@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1111 | * Make sure we are not freeing a object from another node to the array | 1027 | * Make sure we are not freeing a object from another node to the array |
1112 | * cache on this cpu. | 1028 | * cache on this cpu. |
1113 | */ | 1029 | */ |
1114 | if (likely(slabp->nodeid == node)) | 1030 | if (likely(nodeid == node)) |
1115 | return 0; | 1031 | return 0; |
1116 | 1032 | ||
1117 | n = cachep->node[node]; | 1033 | n = cachep->node[node]; |
@@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void) | |||
1512 | { | 1428 | { |
1513 | int i; | 1429 | int i; |
1514 | 1430 | ||
1431 | BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) < | ||
1432 | sizeof(struct rcu_head)); | ||
1515 | kmem_cache = &kmem_cache_boot; | 1433 | kmem_cache = &kmem_cache_boot; |
1516 | setup_node_pointer(kmem_cache); | 1434 | setup_node_pointer(kmem_cache); |
1517 | 1435 | ||
@@ -1687,7 +1605,7 @@ static noinline void | |||
1687 | slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | 1605 | slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) |
1688 | { | 1606 | { |
1689 | struct kmem_cache_node *n; | 1607 | struct kmem_cache_node *n; |
1690 | struct slab *slabp; | 1608 | struct page *page; |
1691 | unsigned long flags; | 1609 | unsigned long flags; |
1692 | int node; | 1610 | int node; |
1693 | 1611 | ||
@@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
1706 | continue; | 1624 | continue; |
1707 | 1625 | ||
1708 | spin_lock_irqsave(&n->list_lock, flags); | 1626 | spin_lock_irqsave(&n->list_lock, flags); |
1709 | list_for_each_entry(slabp, &n->slabs_full, list) { | 1627 | list_for_each_entry(page, &n->slabs_full, lru) { |
1710 | active_objs += cachep->num; | 1628 | active_objs += cachep->num; |
1711 | active_slabs++; | 1629 | active_slabs++; |
1712 | } | 1630 | } |
1713 | list_for_each_entry(slabp, &n->slabs_partial, list) { | 1631 | list_for_each_entry(page, &n->slabs_partial, lru) { |
1714 | active_objs += slabp->inuse; | 1632 | active_objs += page->active; |
1715 | active_slabs++; | 1633 | active_slabs++; |
1716 | } | 1634 | } |
1717 | list_for_each_entry(slabp, &n->slabs_free, list) | 1635 | list_for_each_entry(page, &n->slabs_free, lru) |
1718 | num_slabs++; | 1636 | num_slabs++; |
1719 | 1637 | ||
1720 | free_objects += n->free_objects; | 1638 | free_objects += n->free_objects; |
@@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) | |||
1736 | * did not request dmaable memory, we might get it, but that | 1654 | * did not request dmaable memory, we might get it, but that |
1737 | * would be relatively rare and ignorable. | 1655 | * would be relatively rare and ignorable. |
1738 | */ | 1656 | */ |
1739 | static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 1657 | static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, |
1658 | int nodeid) | ||
1740 | { | 1659 | { |
1741 | struct page *page; | 1660 | struct page *page; |
1742 | int nr_pages; | 1661 | int nr_pages; |
1743 | int i; | ||
1744 | |||
1745 | #ifndef CONFIG_MMU | ||
1746 | /* | ||
1747 | * Nommu uses slab's for process anonymous memory allocations, and thus | ||
1748 | * requires __GFP_COMP to properly refcount higher order allocations | ||
1749 | */ | ||
1750 | flags |= __GFP_COMP; | ||
1751 | #endif | ||
1752 | 1662 | ||
1753 | flags |= cachep->allocflags; | 1663 | flags |= cachep->allocflags; |
1754 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1664 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
@@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1772 | else | 1682 | else |
1773 | add_zone_page_state(page_zone(page), | 1683 | add_zone_page_state(page_zone(page), |
1774 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1684 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
1775 | for (i = 0; i < nr_pages; i++) { | 1685 | __SetPageSlab(page); |
1776 | __SetPageSlab(page + i); | 1686 | if (page->pfmemalloc) |
1777 | 1687 | SetPageSlabPfmemalloc(page); | |
1778 | if (page->pfmemalloc) | ||
1779 | SetPageSlabPfmemalloc(page + i); | ||
1780 | } | ||
1781 | memcg_bind_pages(cachep, cachep->gfporder); | 1688 | memcg_bind_pages(cachep, cachep->gfporder); |
1782 | 1689 | ||
1783 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | 1690 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { |
@@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1789 | kmemcheck_mark_unallocated_pages(page, nr_pages); | 1696 | kmemcheck_mark_unallocated_pages(page, nr_pages); |
1790 | } | 1697 | } |
1791 | 1698 | ||
1792 | return page_address(page); | 1699 | return page; |
1793 | } | 1700 | } |
1794 | 1701 | ||
1795 | /* | 1702 | /* |
1796 | * Interface to system's page release. | 1703 | * Interface to system's page release. |
1797 | */ | 1704 | */ |
1798 | static void kmem_freepages(struct kmem_cache *cachep, void *addr) | 1705 | static void kmem_freepages(struct kmem_cache *cachep, struct page *page) |
1799 | { | 1706 | { |
1800 | unsigned long i = (1 << cachep->gfporder); | 1707 | const unsigned long nr_freed = (1 << cachep->gfporder); |
1801 | struct page *page = virt_to_page(addr); | ||
1802 | const unsigned long nr_freed = i; | ||
1803 | 1708 | ||
1804 | kmemcheck_free_shadow(page, cachep->gfporder); | 1709 | kmemcheck_free_shadow(page, cachep->gfporder); |
1805 | 1710 | ||
@@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1809 | else | 1714 | else |
1810 | sub_zone_page_state(page_zone(page), | 1715 | sub_zone_page_state(page_zone(page), |
1811 | NR_SLAB_UNRECLAIMABLE, nr_freed); | 1716 | NR_SLAB_UNRECLAIMABLE, nr_freed); |
1812 | while (i--) { | 1717 | |
1813 | BUG_ON(!PageSlab(page)); | 1718 | BUG_ON(!PageSlab(page)); |
1814 | __ClearPageSlabPfmemalloc(page); | 1719 | __ClearPageSlabPfmemalloc(page); |
1815 | __ClearPageSlab(page); | 1720 | __ClearPageSlab(page); |
1816 | page++; | 1721 | page_mapcount_reset(page); |
1817 | } | 1722 | page->mapping = NULL; |
1818 | 1723 | ||
1819 | memcg_release_pages(cachep, cachep->gfporder); | 1724 | memcg_release_pages(cachep, cachep->gfporder); |
1820 | if (current->reclaim_state) | 1725 | if (current->reclaim_state) |
1821 | current->reclaim_state->reclaimed_slab += nr_freed; | 1726 | current->reclaim_state->reclaimed_slab += nr_freed; |
1822 | free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); | 1727 | __free_memcg_kmem_pages(page, cachep->gfporder); |
1823 | } | 1728 | } |
1824 | 1729 | ||
1825 | static void kmem_rcu_free(struct rcu_head *head) | 1730 | static void kmem_rcu_free(struct rcu_head *head) |
1826 | { | 1731 | { |
1827 | struct slab_rcu *slab_rcu = (struct slab_rcu *)head; | 1732 | struct kmem_cache *cachep; |
1828 | struct kmem_cache *cachep = slab_rcu->cachep; | 1733 | struct page *page; |
1829 | 1734 | ||
1830 | kmem_freepages(cachep, slab_rcu->addr); | 1735 | page = container_of(head, struct page, rcu_head); |
1831 | if (OFF_SLAB(cachep)) | 1736 | cachep = page->slab_cache; |
1832 | kmem_cache_free(cachep->slabp_cache, slab_rcu); | 1737 | |
1738 | kmem_freepages(cachep, page); | ||
1833 | } | 1739 | } |
1834 | 1740 | ||
1835 | #if DEBUG | 1741 | #if DEBUG |
@@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1978 | /* Print some data about the neighboring objects, if they | 1884 | /* Print some data about the neighboring objects, if they |
1979 | * exist: | 1885 | * exist: |
1980 | */ | 1886 | */ |
1981 | struct slab *slabp = virt_to_slab(objp); | 1887 | struct page *page = virt_to_head_page(objp); |
1982 | unsigned int objnr; | 1888 | unsigned int objnr; |
1983 | 1889 | ||
1984 | objnr = obj_to_index(cachep, slabp, objp); | 1890 | objnr = obj_to_index(cachep, page, objp); |
1985 | if (objnr) { | 1891 | if (objnr) { |
1986 | objp = index_to_obj(cachep, slabp, objnr - 1); | 1892 | objp = index_to_obj(cachep, page, objnr - 1); |
1987 | realobj = (char *)objp + obj_offset(cachep); | 1893 | realobj = (char *)objp + obj_offset(cachep); |
1988 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1894 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
1989 | realobj, size); | 1895 | realobj, size); |
1990 | print_objinfo(cachep, objp, 2); | 1896 | print_objinfo(cachep, objp, 2); |
1991 | } | 1897 | } |
1992 | if (objnr + 1 < cachep->num) { | 1898 | if (objnr + 1 < cachep->num) { |
1993 | objp = index_to_obj(cachep, slabp, objnr + 1); | 1899 | objp = index_to_obj(cachep, page, objnr + 1); |
1994 | realobj = (char *)objp + obj_offset(cachep); | 1900 | realobj = (char *)objp + obj_offset(cachep); |
1995 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1901 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
1996 | realobj, size); | 1902 | realobj, size); |
@@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
2001 | #endif | 1907 | #endif |
2002 | 1908 | ||
2003 | #if DEBUG | 1909 | #if DEBUG |
2004 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) | 1910 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, |
1911 | struct page *page) | ||
2005 | { | 1912 | { |
2006 | int i; | 1913 | int i; |
2007 | for (i = 0; i < cachep->num; i++) { | 1914 | for (i = 0; i < cachep->num; i++) { |
2008 | void *objp = index_to_obj(cachep, slabp, i); | 1915 | void *objp = index_to_obj(cachep, page, i); |
2009 | 1916 | ||
2010 | if (cachep->flags & SLAB_POISON) { | 1917 | if (cachep->flags & SLAB_POISON) { |
2011 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1918 | #ifdef CONFIG_DEBUG_PAGEALLOC |
@@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab | |||
2030 | } | 1937 | } |
2031 | } | 1938 | } |
2032 | #else | 1939 | #else |
2033 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) | 1940 | static void slab_destroy_debugcheck(struct kmem_cache *cachep, |
1941 | struct page *page) | ||
2034 | { | 1942 | { |
2035 | } | 1943 | } |
2036 | #endif | 1944 | #endif |
@@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab | |||
2044 | * Before calling the slab must have been unlinked from the cache. The | 1952 | * Before calling the slab must have been unlinked from the cache. The |
2045 | * cache-lock is not held/needed. | 1953 | * cache-lock is not held/needed. |
2046 | */ | 1954 | */ |
2047 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 1955 | static void slab_destroy(struct kmem_cache *cachep, struct page *page) |
2048 | { | 1956 | { |
2049 | void *addr = slabp->s_mem - slabp->colouroff; | 1957 | void *freelist; |
2050 | 1958 | ||
2051 | slab_destroy_debugcheck(cachep, slabp); | 1959 | freelist = page->freelist; |
1960 | slab_destroy_debugcheck(cachep, page); | ||
2052 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { | 1961 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { |
2053 | struct slab_rcu *slab_rcu; | 1962 | struct rcu_head *head; |
1963 | |||
1964 | /* | ||
1965 | * RCU free overloads the RCU head over the LRU. | ||
1966 | * slab_page has been overloeaded over the LRU, | ||
1967 | * however it is not used from now on so that | ||
1968 | * we can use it safely. | ||
1969 | */ | ||
1970 | head = (void *)&page->rcu_head; | ||
1971 | call_rcu(head, kmem_rcu_free); | ||
2054 | 1972 | ||
2055 | slab_rcu = (struct slab_rcu *)slabp; | ||
2056 | slab_rcu->cachep = cachep; | ||
2057 | slab_rcu->addr = addr; | ||
2058 | call_rcu(&slab_rcu->head, kmem_rcu_free); | ||
2059 | } else { | 1973 | } else { |
2060 | kmem_freepages(cachep, addr); | 1974 | kmem_freepages(cachep, page); |
2061 | if (OFF_SLAB(cachep)) | ||
2062 | kmem_cache_free(cachep->slabp_cache, slabp); | ||
2063 | } | 1975 | } |
1976 | |||
1977 | /* | ||
1978 | * From now on, we don't use freelist | ||
1979 | * although actual page can be freed in rcu context | ||
1980 | */ | ||
1981 | if (OFF_SLAB(cachep)) | ||
1982 | kmem_cache_free(cachep->freelist_cache, freelist); | ||
2064 | } | 1983 | } |
2065 | 1984 | ||
2066 | /** | 1985 | /** |
@@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2097 | * use off-slab slabs. Needed to avoid a possible | 2016 | * use off-slab slabs. Needed to avoid a possible |
2098 | * looping condition in cache_grow(). | 2017 | * looping condition in cache_grow(). |
2099 | */ | 2018 | */ |
2100 | offslab_limit = size - sizeof(struct slab); | 2019 | offslab_limit = size; |
2101 | offslab_limit /= sizeof(kmem_bufctl_t); | 2020 | offslab_limit /= sizeof(unsigned int); |
2102 | 2021 | ||
2103 | if (num > offslab_limit) | 2022 | if (num > offslab_limit) |
2104 | break; | 2023 | break; |
@@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
2220 | int | 2139 | int |
2221 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | 2140 | __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) |
2222 | { | 2141 | { |
2223 | size_t left_over, slab_size, ralign; | 2142 | size_t left_over, freelist_size, ralign; |
2224 | gfp_t gfp; | 2143 | gfp_t gfp; |
2225 | int err; | 2144 | int err; |
2226 | size_t size = cachep->size; | 2145 | size_t size = cachep->size; |
@@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2339 | if (!cachep->num) | 2258 | if (!cachep->num) |
2340 | return -E2BIG; | 2259 | return -E2BIG; |
2341 | 2260 | ||
2342 | slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) | 2261 | freelist_size = |
2343 | + sizeof(struct slab), cachep->align); | 2262 | ALIGN(cachep->num * sizeof(unsigned int), cachep->align); |
2344 | 2263 | ||
2345 | /* | 2264 | /* |
2346 | * If the slab has been placed off-slab, and we have enough space then | 2265 | * If the slab has been placed off-slab, and we have enough space then |
2347 | * move it on-slab. This is at the expense of any extra colouring. | 2266 | * move it on-slab. This is at the expense of any extra colouring. |
2348 | */ | 2267 | */ |
2349 | if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { | 2268 | if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) { |
2350 | flags &= ~CFLGS_OFF_SLAB; | 2269 | flags &= ~CFLGS_OFF_SLAB; |
2351 | left_over -= slab_size; | 2270 | left_over -= freelist_size; |
2352 | } | 2271 | } |
2353 | 2272 | ||
2354 | if (flags & CFLGS_OFF_SLAB) { | 2273 | if (flags & CFLGS_OFF_SLAB) { |
2355 | /* really off slab. No need for manual alignment */ | 2274 | /* really off slab. No need for manual alignment */ |
2356 | slab_size = | 2275 | freelist_size = cachep->num * sizeof(unsigned int); |
2357 | cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); | ||
2358 | 2276 | ||
2359 | #ifdef CONFIG_PAGE_POISONING | 2277 | #ifdef CONFIG_PAGE_POISONING |
2360 | /* If we're going to use the generic kernel_map_pages() | 2278 | /* If we're going to use the generic kernel_map_pages() |
@@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2371 | if (cachep->colour_off < cachep->align) | 2289 | if (cachep->colour_off < cachep->align) |
2372 | cachep->colour_off = cachep->align; | 2290 | cachep->colour_off = cachep->align; |
2373 | cachep->colour = left_over / cachep->colour_off; | 2291 | cachep->colour = left_over / cachep->colour_off; |
2374 | cachep->slab_size = slab_size; | 2292 | cachep->freelist_size = freelist_size; |
2375 | cachep->flags = flags; | 2293 | cachep->flags = flags; |
2376 | cachep->allocflags = 0; | 2294 | cachep->allocflags = __GFP_COMP; |
2377 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) | 2295 | if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) |
2378 | cachep->allocflags |= GFP_DMA; | 2296 | cachep->allocflags |= GFP_DMA; |
2379 | cachep->size = size; | 2297 | cachep->size = size; |
2380 | cachep->reciprocal_buffer_size = reciprocal_value(size); | 2298 | cachep->reciprocal_buffer_size = reciprocal_value(size); |
2381 | 2299 | ||
2382 | if (flags & CFLGS_OFF_SLAB) { | 2300 | if (flags & CFLGS_OFF_SLAB) { |
2383 | cachep->slabp_cache = kmalloc_slab(slab_size, 0u); | 2301 | cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); |
2384 | /* | 2302 | /* |
2385 | * This is a possibility for one of the malloc_sizes caches. | 2303 | * This is a possibility for one of the malloc_sizes caches. |
2386 | * But since we go off slab only for object size greater than | 2304 | * But since we go off slab only for object size greater than |
@@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2388 | * this should not happen at all. | 2306 | * this should not happen at all. |
2389 | * But leave a BUG_ON for some lucky dude. | 2307 | * But leave a BUG_ON for some lucky dude. |
2390 | */ | 2308 | */ |
2391 | BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); | 2309 | BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); |
2392 | } | 2310 | } |
2393 | 2311 | ||
2394 | err = setup_cpu_cache(cachep, gfp); | 2312 | err = setup_cpu_cache(cachep, gfp); |
@@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache, | |||
2494 | { | 2412 | { |
2495 | struct list_head *p; | 2413 | struct list_head *p; |
2496 | int nr_freed; | 2414 | int nr_freed; |
2497 | struct slab *slabp; | 2415 | struct page *page; |
2498 | 2416 | ||
2499 | nr_freed = 0; | 2417 | nr_freed = 0; |
2500 | while (nr_freed < tofree && !list_empty(&n->slabs_free)) { | 2418 | while (nr_freed < tofree && !list_empty(&n->slabs_free)) { |
@@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache, | |||
2506 | goto out; | 2424 | goto out; |
2507 | } | 2425 | } |
2508 | 2426 | ||
2509 | slabp = list_entry(p, struct slab, list); | 2427 | page = list_entry(p, struct page, lru); |
2510 | #if DEBUG | 2428 | #if DEBUG |
2511 | BUG_ON(slabp->inuse); | 2429 | BUG_ON(page->active); |
2512 | #endif | 2430 | #endif |
2513 | list_del(&slabp->list); | 2431 | list_del(&page->lru); |
2514 | /* | 2432 | /* |
2515 | * Safe to drop the lock. The slab is no longer linked | 2433 | * Safe to drop the lock. The slab is no longer linked |
2516 | * to the cache. | 2434 | * to the cache. |
2517 | */ | 2435 | */ |
2518 | n->free_objects -= cache->num; | 2436 | n->free_objects -= cache->num; |
2519 | spin_unlock_irq(&n->list_lock); | 2437 | spin_unlock_irq(&n->list_lock); |
2520 | slab_destroy(cache, slabp); | 2438 | slab_destroy(cache, page); |
2521 | nr_freed++; | 2439 | nr_freed++; |
2522 | } | 2440 | } |
2523 | out: | 2441 | out: |
@@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) | |||
2600 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. | 2518 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. |
2601 | * If we are creating a malloc_sizes cache here it would not be visible to | 2519 | * If we are creating a malloc_sizes cache here it would not be visible to |
2602 | * kmem_find_general_cachep till the initialization is complete. | 2520 | * kmem_find_general_cachep till the initialization is complete. |
2603 | * Hence we cannot have slabp_cache same as the original cache. | 2521 | * Hence we cannot have freelist_cache same as the original cache. |
2604 | */ | 2522 | */ |
2605 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | 2523 | static void *alloc_slabmgmt(struct kmem_cache *cachep, |
2606 | int colour_off, gfp_t local_flags, | 2524 | struct page *page, int colour_off, |
2607 | int nodeid) | 2525 | gfp_t local_flags, int nodeid) |
2608 | { | 2526 | { |
2609 | struct slab *slabp; | 2527 | void *freelist; |
2528 | void *addr = page_address(page); | ||
2610 | 2529 | ||
2611 | if (OFF_SLAB(cachep)) { | 2530 | if (OFF_SLAB(cachep)) { |
2612 | /* Slab management obj is off-slab. */ | 2531 | /* Slab management obj is off-slab. */ |
2613 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, | 2532 | freelist = kmem_cache_alloc_node(cachep->freelist_cache, |
2614 | local_flags, nodeid); | 2533 | local_flags, nodeid); |
2615 | /* | 2534 | if (!freelist) |
2616 | * If the first object in the slab is leaked (it's allocated | ||
2617 | * but no one has a reference to it), we want to make sure | ||
2618 | * kmemleak does not treat the ->s_mem pointer as a reference | ||
2619 | * to the object. Otherwise we will not report the leak. | ||
2620 | */ | ||
2621 | kmemleak_scan_area(&slabp->list, sizeof(struct list_head), | ||
2622 | local_flags); | ||
2623 | if (!slabp) | ||
2624 | return NULL; | 2535 | return NULL; |
2625 | } else { | 2536 | } else { |
2626 | slabp = objp + colour_off; | 2537 | freelist = addr + colour_off; |
2627 | colour_off += cachep->slab_size; | 2538 | colour_off += cachep->freelist_size; |
2628 | } | 2539 | } |
2629 | slabp->inuse = 0; | 2540 | page->active = 0; |
2630 | slabp->colouroff = colour_off; | 2541 | page->s_mem = addr + colour_off; |
2631 | slabp->s_mem = objp + colour_off; | 2542 | return freelist; |
2632 | slabp->nodeid = nodeid; | ||
2633 | slabp->free = 0; | ||
2634 | return slabp; | ||
2635 | } | 2543 | } |
2636 | 2544 | ||
2637 | static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) | 2545 | static inline unsigned int *slab_freelist(struct page *page) |
2638 | { | 2546 | { |
2639 | return (kmem_bufctl_t *) (slabp + 1); | 2547 | return (unsigned int *)(page->freelist); |
2640 | } | 2548 | } |
2641 | 2549 | ||
2642 | static void cache_init_objs(struct kmem_cache *cachep, | 2550 | static void cache_init_objs(struct kmem_cache *cachep, |
2643 | struct slab *slabp) | 2551 | struct page *page) |
2644 | { | 2552 | { |
2645 | int i; | 2553 | int i; |
2646 | 2554 | ||
2647 | for (i = 0; i < cachep->num; i++) { | 2555 | for (i = 0; i < cachep->num; i++) { |
2648 | void *objp = index_to_obj(cachep, slabp, i); | 2556 | void *objp = index_to_obj(cachep, page, i); |
2649 | #if DEBUG | 2557 | #if DEBUG |
2650 | /* need to poison the objs? */ | 2558 | /* need to poison the objs? */ |
2651 | if (cachep->flags & SLAB_POISON) | 2559 | if (cachep->flags & SLAB_POISON) |
@@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2681 | if (cachep->ctor) | 2589 | if (cachep->ctor) |
2682 | cachep->ctor(objp); | 2590 | cachep->ctor(objp); |
2683 | #endif | 2591 | #endif |
2684 | slab_bufctl(slabp)[i] = i + 1; | 2592 | slab_freelist(page)[i] = i; |
2685 | } | 2593 | } |
2686 | slab_bufctl(slabp)[i - 1] = BUFCTL_END; | ||
2687 | } | 2594 | } |
2688 | 2595 | ||
2689 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2596 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
@@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | |||
2696 | } | 2603 | } |
2697 | } | 2604 | } |
2698 | 2605 | ||
2699 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, | 2606 | static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, |
2700 | int nodeid) | 2607 | int nodeid) |
2701 | { | 2608 | { |
2702 | void *objp = index_to_obj(cachep, slabp, slabp->free); | 2609 | void *objp; |
2703 | kmem_bufctl_t next; | ||
2704 | 2610 | ||
2705 | slabp->inuse++; | 2611 | objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]); |
2706 | next = slab_bufctl(slabp)[slabp->free]; | 2612 | page->active++; |
2707 | #if DEBUG | 2613 | #if DEBUG |
2708 | slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; | 2614 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); |
2709 | WARN_ON(slabp->nodeid != nodeid); | ||
2710 | #endif | 2615 | #endif |
2711 | slabp->free = next; | ||
2712 | 2616 | ||
2713 | return objp; | 2617 | return objp; |
2714 | } | 2618 | } |
2715 | 2619 | ||
2716 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, | 2620 | static void slab_put_obj(struct kmem_cache *cachep, struct page *page, |
2717 | void *objp, int nodeid) | 2621 | void *objp, int nodeid) |
2718 | { | 2622 | { |
2719 | unsigned int objnr = obj_to_index(cachep, slabp, objp); | 2623 | unsigned int objnr = obj_to_index(cachep, page, objp); |
2720 | |||
2721 | #if DEBUG | 2624 | #if DEBUG |
2625 | unsigned int i; | ||
2626 | |||
2722 | /* Verify that the slab belongs to the intended node */ | 2627 | /* Verify that the slab belongs to the intended node */ |
2723 | WARN_ON(slabp->nodeid != nodeid); | 2628 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); |
2724 | 2629 | ||
2725 | if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { | 2630 | /* Verify double free bug */ |
2726 | printk(KERN_ERR "slab: double free detected in cache " | 2631 | for (i = page->active; i < cachep->num; i++) { |
2727 | "'%s', objp %p\n", cachep->name, objp); | 2632 | if (slab_freelist(page)[i] == objnr) { |
2728 | BUG(); | 2633 | printk(KERN_ERR "slab: double free detected in cache " |
2634 | "'%s', objp %p\n", cachep->name, objp); | ||
2635 | BUG(); | ||
2636 | } | ||
2729 | } | 2637 | } |
2730 | #endif | 2638 | #endif |
2731 | slab_bufctl(slabp)[objnr] = slabp->free; | 2639 | page->active--; |
2732 | slabp->free = objnr; | 2640 | slab_freelist(page)[page->active] = objnr; |
2733 | slabp->inuse--; | ||
2734 | } | 2641 | } |
2735 | 2642 | ||
2736 | /* | 2643 | /* |
@@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, | |||
2738 | * for the slab allocator to be able to lookup the cache and slab of a | 2645 | * for the slab allocator to be able to lookup the cache and slab of a |
2739 | * virtual address for kfree, ksize, and slab debugging. | 2646 | * virtual address for kfree, ksize, and slab debugging. |
2740 | */ | 2647 | */ |
2741 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | 2648 | static void slab_map_pages(struct kmem_cache *cache, struct page *page, |
2742 | void *addr) | 2649 | void *freelist) |
2743 | { | 2650 | { |
2744 | int nr_pages; | 2651 | page->slab_cache = cache; |
2745 | struct page *page; | 2652 | page->freelist = freelist; |
2746 | |||
2747 | page = virt_to_page(addr); | ||
2748 | |||
2749 | nr_pages = 1; | ||
2750 | if (likely(!PageCompound(page))) | ||
2751 | nr_pages <<= cache->gfporder; | ||
2752 | |||
2753 | do { | ||
2754 | page->slab_cache = cache; | ||
2755 | page->slab_page = slab; | ||
2756 | page++; | ||
2757 | } while (--nr_pages); | ||
2758 | } | 2653 | } |
2759 | 2654 | ||
2760 | /* | 2655 | /* |
@@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | |||
2762 | * kmem_cache_alloc() when there are no active objs left in a cache. | 2657 | * kmem_cache_alloc() when there are no active objs left in a cache. |
2763 | */ | 2658 | */ |
2764 | static int cache_grow(struct kmem_cache *cachep, | 2659 | static int cache_grow(struct kmem_cache *cachep, |
2765 | gfp_t flags, int nodeid, void *objp) | 2660 | gfp_t flags, int nodeid, struct page *page) |
2766 | { | 2661 | { |
2767 | struct slab *slabp; | 2662 | void *freelist; |
2768 | size_t offset; | 2663 | size_t offset; |
2769 | gfp_t local_flags; | 2664 | gfp_t local_flags; |
2770 | struct kmem_cache_node *n; | 2665 | struct kmem_cache_node *n; |
@@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep, | |||
2805 | * Get mem for the objs. Attempt to allocate a physical page from | 2700 | * Get mem for the objs. Attempt to allocate a physical page from |
2806 | * 'nodeid'. | 2701 | * 'nodeid'. |
2807 | */ | 2702 | */ |
2808 | if (!objp) | 2703 | if (!page) |
2809 | objp = kmem_getpages(cachep, local_flags, nodeid); | 2704 | page = kmem_getpages(cachep, local_flags, nodeid); |
2810 | if (!objp) | 2705 | if (!page) |
2811 | goto failed; | 2706 | goto failed; |
2812 | 2707 | ||
2813 | /* Get slab management. */ | 2708 | /* Get slab management. */ |
2814 | slabp = alloc_slabmgmt(cachep, objp, offset, | 2709 | freelist = alloc_slabmgmt(cachep, page, offset, |
2815 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); | 2710 | local_flags & ~GFP_CONSTRAINT_MASK, nodeid); |
2816 | if (!slabp) | 2711 | if (!freelist) |
2817 | goto opps1; | 2712 | goto opps1; |
2818 | 2713 | ||
2819 | slab_map_pages(cachep, slabp, objp); | 2714 | slab_map_pages(cachep, page, freelist); |
2820 | 2715 | ||
2821 | cache_init_objs(cachep, slabp); | 2716 | cache_init_objs(cachep, page); |
2822 | 2717 | ||
2823 | if (local_flags & __GFP_WAIT) | 2718 | if (local_flags & __GFP_WAIT) |
2824 | local_irq_disable(); | 2719 | local_irq_disable(); |
@@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep, | |||
2826 | spin_lock(&n->list_lock); | 2721 | spin_lock(&n->list_lock); |
2827 | 2722 | ||
2828 | /* Make slab active. */ | 2723 | /* Make slab active. */ |
2829 | list_add_tail(&slabp->list, &(n->slabs_free)); | 2724 | list_add_tail(&page->lru, &(n->slabs_free)); |
2830 | STATS_INC_GROWN(cachep); | 2725 | STATS_INC_GROWN(cachep); |
2831 | n->free_objects += cachep->num; | 2726 | n->free_objects += cachep->num; |
2832 | spin_unlock(&n->list_lock); | 2727 | spin_unlock(&n->list_lock); |
2833 | return 1; | 2728 | return 1; |
2834 | opps1: | 2729 | opps1: |
2835 | kmem_freepages(cachep, objp); | 2730 | kmem_freepages(cachep, page); |
2836 | failed: | 2731 | failed: |
2837 | if (local_flags & __GFP_WAIT) | 2732 | if (local_flags & __GFP_WAIT) |
2838 | local_irq_disable(); | 2733 | local_irq_disable(); |
@@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj) | |||
2880 | static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | 2775 | static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, |
2881 | unsigned long caller) | 2776 | unsigned long caller) |
2882 | { | 2777 | { |
2883 | struct page *page; | ||
2884 | unsigned int objnr; | 2778 | unsigned int objnr; |
2885 | struct slab *slabp; | 2779 | struct page *page; |
2886 | 2780 | ||
2887 | BUG_ON(virt_to_cache(objp) != cachep); | 2781 | BUG_ON(virt_to_cache(objp) != cachep); |
2888 | 2782 | ||
@@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2890 | kfree_debugcheck(objp); | 2784 | kfree_debugcheck(objp); |
2891 | page = virt_to_head_page(objp); | 2785 | page = virt_to_head_page(objp); |
2892 | 2786 | ||
2893 | slabp = page->slab_page; | ||
2894 | |||
2895 | if (cachep->flags & SLAB_RED_ZONE) { | 2787 | if (cachep->flags & SLAB_RED_ZONE) { |
2896 | verify_redzone_free(cachep, objp); | 2788 | verify_redzone_free(cachep, objp); |
2897 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; | 2789 | *dbg_redzone1(cachep, objp) = RED_INACTIVE; |
@@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2900 | if (cachep->flags & SLAB_STORE_USER) | 2792 | if (cachep->flags & SLAB_STORE_USER) |
2901 | *dbg_userword(cachep, objp) = (void *)caller; | 2793 | *dbg_userword(cachep, objp) = (void *)caller; |
2902 | 2794 | ||
2903 | objnr = obj_to_index(cachep, slabp, objp); | 2795 | objnr = obj_to_index(cachep, page, objp); |
2904 | 2796 | ||
2905 | BUG_ON(objnr >= cachep->num); | 2797 | BUG_ON(objnr >= cachep->num); |
2906 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); | 2798 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); |
2907 | 2799 | ||
2908 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
2909 | slab_bufctl(slabp)[objnr] = BUFCTL_FREE; | ||
2910 | #endif | ||
2911 | if (cachep->flags & SLAB_POISON) { | 2800 | if (cachep->flags & SLAB_POISON) { |
2912 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2801 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2913 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { | 2802 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
@@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2924 | return objp; | 2813 | return objp; |
2925 | } | 2814 | } |
2926 | 2815 | ||
2927 | static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | ||
2928 | { | ||
2929 | kmem_bufctl_t i; | ||
2930 | int entries = 0; | ||
2931 | |||
2932 | /* Check slab's freelist to see if this obj is there. */ | ||
2933 | for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { | ||
2934 | entries++; | ||
2935 | if (entries > cachep->num || i >= cachep->num) | ||
2936 | goto bad; | ||
2937 | } | ||
2938 | if (entries != cachep->num - slabp->inuse) { | ||
2939 | bad: | ||
2940 | printk(KERN_ERR "slab: Internal list corruption detected in " | ||
2941 | "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n", | ||
2942 | cachep->name, cachep->num, slabp, slabp->inuse, | ||
2943 | print_tainted()); | ||
2944 | print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp, | ||
2945 | sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t), | ||
2946 | 1); | ||
2947 | BUG(); | ||
2948 | } | ||
2949 | } | ||
2950 | #else | 2816 | #else |
2951 | #define kfree_debugcheck(x) do { } while(0) | 2817 | #define kfree_debugcheck(x) do { } while(0) |
2952 | #define cache_free_debugcheck(x,objp,z) (objp) | 2818 | #define cache_free_debugcheck(x,objp,z) (objp) |
2953 | #define check_slabp(x,y) do { } while(0) | ||
2954 | #endif | 2819 | #endif |
2955 | 2820 | ||
2956 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, | 2821 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, |
@@ -2989,7 +2854,7 @@ retry: | |||
2989 | 2854 | ||
2990 | while (batchcount > 0) { | 2855 | while (batchcount > 0) { |
2991 | struct list_head *entry; | 2856 | struct list_head *entry; |
2992 | struct slab *slabp; | 2857 | struct page *page; |
2993 | /* Get slab alloc is to come from. */ | 2858 | /* Get slab alloc is to come from. */ |
2994 | entry = n->slabs_partial.next; | 2859 | entry = n->slabs_partial.next; |
2995 | if (entry == &n->slabs_partial) { | 2860 | if (entry == &n->slabs_partial) { |
@@ -2999,8 +2864,7 @@ retry: | |||
2999 | goto must_grow; | 2864 | goto must_grow; |
3000 | } | 2865 | } |
3001 | 2866 | ||
3002 | slabp = list_entry(entry, struct slab, list); | 2867 | page = list_entry(entry, struct page, lru); |
3003 | check_slabp(cachep, slabp); | ||
3004 | check_spinlock_acquired(cachep); | 2868 | check_spinlock_acquired(cachep); |
3005 | 2869 | ||
3006 | /* | 2870 | /* |
@@ -3008,24 +2872,23 @@ retry: | |||
3008 | * there must be at least one object available for | 2872 | * there must be at least one object available for |
3009 | * allocation. | 2873 | * allocation. |
3010 | */ | 2874 | */ |
3011 | BUG_ON(slabp->inuse >= cachep->num); | 2875 | BUG_ON(page->active >= cachep->num); |
3012 | 2876 | ||
3013 | while (slabp->inuse < cachep->num && batchcount--) { | 2877 | while (page->active < cachep->num && batchcount--) { |
3014 | STATS_INC_ALLOCED(cachep); | 2878 | STATS_INC_ALLOCED(cachep); |
3015 | STATS_INC_ACTIVE(cachep); | 2879 | STATS_INC_ACTIVE(cachep); |
3016 | STATS_SET_HIGH(cachep); | 2880 | STATS_SET_HIGH(cachep); |
3017 | 2881 | ||
3018 | ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, | 2882 | ac_put_obj(cachep, ac, slab_get_obj(cachep, page, |
3019 | node)); | 2883 | node)); |
3020 | } | 2884 | } |
3021 | check_slabp(cachep, slabp); | ||
3022 | 2885 | ||
3023 | /* move slabp to correct slabp list: */ | 2886 | /* move slabp to correct slabp list: */ |
3024 | list_del(&slabp->list); | 2887 | list_del(&page->lru); |
3025 | if (slabp->free == BUFCTL_END) | 2888 | if (page->active == cachep->num) |
3026 | list_add(&slabp->list, &n->slabs_full); | 2889 | list_add(&page->list, &n->slabs_full); |
3027 | else | 2890 | else |
3028 | list_add(&slabp->list, &n->slabs_partial); | 2891 | list_add(&page->list, &n->slabs_partial); |
3029 | } | 2892 | } |
3030 | 2893 | ||
3031 | must_grow: | 2894 | must_grow: |
@@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
3097 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 2960 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
3098 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2961 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
3099 | } | 2962 | } |
3100 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
3101 | { | ||
3102 | struct slab *slabp; | ||
3103 | unsigned objnr; | ||
3104 | |||
3105 | slabp = virt_to_head_page(objp)->slab_page; | ||
3106 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->size; | ||
3107 | slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; | ||
3108 | } | ||
3109 | #endif | ||
3110 | objp += obj_offset(cachep); | 2963 | objp += obj_offset(cachep); |
3111 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 2964 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
3112 | cachep->ctor(objp); | 2965 | cachep->ctor(objp); |
@@ -3248,18 +3101,20 @@ retry: | |||
3248 | * We may trigger various forms of reclaim on the allowed | 3101 | * We may trigger various forms of reclaim on the allowed |
3249 | * set and go into memory reserves if necessary. | 3102 | * set and go into memory reserves if necessary. |
3250 | */ | 3103 | */ |
3104 | struct page *page; | ||
3105 | |||
3251 | if (local_flags & __GFP_WAIT) | 3106 | if (local_flags & __GFP_WAIT) |
3252 | local_irq_enable(); | 3107 | local_irq_enable(); |
3253 | kmem_flagcheck(cache, flags); | 3108 | kmem_flagcheck(cache, flags); |
3254 | obj = kmem_getpages(cache, local_flags, numa_mem_id()); | 3109 | page = kmem_getpages(cache, local_flags, numa_mem_id()); |
3255 | if (local_flags & __GFP_WAIT) | 3110 | if (local_flags & __GFP_WAIT) |
3256 | local_irq_disable(); | 3111 | local_irq_disable(); |
3257 | if (obj) { | 3112 | if (page) { |
3258 | /* | 3113 | /* |
3259 | * Insert into the appropriate per node queues | 3114 | * Insert into the appropriate per node queues |
3260 | */ | 3115 | */ |
3261 | nid = page_to_nid(virt_to_page(obj)); | 3116 | nid = page_to_nid(page); |
3262 | if (cache_grow(cache, flags, nid, obj)) { | 3117 | if (cache_grow(cache, flags, nid, page)) { |
3263 | obj = ____cache_alloc_node(cache, | 3118 | obj = ____cache_alloc_node(cache, |
3264 | flags | GFP_THISNODE, nid); | 3119 | flags | GFP_THISNODE, nid); |
3265 | if (!obj) | 3120 | if (!obj) |
@@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
3288 | int nodeid) | 3143 | int nodeid) |
3289 | { | 3144 | { |
3290 | struct list_head *entry; | 3145 | struct list_head *entry; |
3291 | struct slab *slabp; | 3146 | struct page *page; |
3292 | struct kmem_cache_node *n; | 3147 | struct kmem_cache_node *n; |
3293 | void *obj; | 3148 | void *obj; |
3294 | int x; | 3149 | int x; |
@@ -3308,26 +3163,24 @@ retry: | |||
3308 | goto must_grow; | 3163 | goto must_grow; |
3309 | } | 3164 | } |
3310 | 3165 | ||
3311 | slabp = list_entry(entry, struct slab, list); | 3166 | page = list_entry(entry, struct page, lru); |
3312 | check_spinlock_acquired_node(cachep, nodeid); | 3167 | check_spinlock_acquired_node(cachep, nodeid); |
3313 | check_slabp(cachep, slabp); | ||
3314 | 3168 | ||
3315 | STATS_INC_NODEALLOCS(cachep); | 3169 | STATS_INC_NODEALLOCS(cachep); |
3316 | STATS_INC_ACTIVE(cachep); | 3170 | STATS_INC_ACTIVE(cachep); |
3317 | STATS_SET_HIGH(cachep); | 3171 | STATS_SET_HIGH(cachep); |
3318 | 3172 | ||
3319 | BUG_ON(slabp->inuse == cachep->num); | 3173 | BUG_ON(page->active == cachep->num); |
3320 | 3174 | ||
3321 | obj = slab_get_obj(cachep, slabp, nodeid); | 3175 | obj = slab_get_obj(cachep, page, nodeid); |
3322 | check_slabp(cachep, slabp); | ||
3323 | n->free_objects--; | 3176 | n->free_objects--; |
3324 | /* move slabp to correct slabp list: */ | 3177 | /* move slabp to correct slabp list: */ |
3325 | list_del(&slabp->list); | 3178 | list_del(&page->lru); |
3326 | 3179 | ||
3327 | if (slabp->free == BUFCTL_END) | 3180 | if (page->active == cachep->num) |
3328 | list_add(&slabp->list, &n->slabs_full); | 3181 | list_add(&page->lru, &n->slabs_full); |
3329 | else | 3182 | else |
3330 | list_add(&slabp->list, &n->slabs_partial); | 3183 | list_add(&page->lru, &n->slabs_partial); |
3331 | 3184 | ||
3332 | spin_unlock(&n->list_lock); | 3185 | spin_unlock(&n->list_lock); |
3333 | goto done; | 3186 | goto done; |
@@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
3477 | 3330 | ||
3478 | for (i = 0; i < nr_objects; i++) { | 3331 | for (i = 0; i < nr_objects; i++) { |
3479 | void *objp; | 3332 | void *objp; |
3480 | struct slab *slabp; | 3333 | struct page *page; |
3481 | 3334 | ||
3482 | clear_obj_pfmemalloc(&objpp[i]); | 3335 | clear_obj_pfmemalloc(&objpp[i]); |
3483 | objp = objpp[i]; | 3336 | objp = objpp[i]; |
3484 | 3337 | ||
3485 | slabp = virt_to_slab(objp); | 3338 | page = virt_to_head_page(objp); |
3486 | n = cachep->node[node]; | 3339 | n = cachep->node[node]; |
3487 | list_del(&slabp->list); | 3340 | list_del(&page->lru); |
3488 | check_spinlock_acquired_node(cachep, node); | 3341 | check_spinlock_acquired_node(cachep, node); |
3489 | check_slabp(cachep, slabp); | 3342 | slab_put_obj(cachep, page, objp, node); |
3490 | slab_put_obj(cachep, slabp, objp, node); | ||
3491 | STATS_DEC_ACTIVE(cachep); | 3343 | STATS_DEC_ACTIVE(cachep); |
3492 | n->free_objects++; | 3344 | n->free_objects++; |
3493 | check_slabp(cachep, slabp); | ||
3494 | 3345 | ||
3495 | /* fixup slab chains */ | 3346 | /* fixup slab chains */ |
3496 | if (slabp->inuse == 0) { | 3347 | if (page->active == 0) { |
3497 | if (n->free_objects > n->free_limit) { | 3348 | if (n->free_objects > n->free_limit) { |
3498 | n->free_objects -= cachep->num; | 3349 | n->free_objects -= cachep->num; |
3499 | /* No need to drop any previously held | 3350 | /* No need to drop any previously held |
@@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
3502 | * a different cache, refer to comments before | 3353 | * a different cache, refer to comments before |
3503 | * alloc_slabmgmt. | 3354 | * alloc_slabmgmt. |
3504 | */ | 3355 | */ |
3505 | slab_destroy(cachep, slabp); | 3356 | slab_destroy(cachep, page); |
3506 | } else { | 3357 | } else { |
3507 | list_add(&slabp->list, &n->slabs_free); | 3358 | list_add(&page->lru, &n->slabs_free); |
3508 | } | 3359 | } |
3509 | } else { | 3360 | } else { |
3510 | /* Unconditionally move a slab to the end of the | 3361 | /* Unconditionally move a slab to the end of the |
3511 | * partial list on free - maximum time for the | 3362 | * partial list on free - maximum time for the |
3512 | * other objects to be freed, too. | 3363 | * other objects to be freed, too. |
3513 | */ | 3364 | */ |
3514 | list_add_tail(&slabp->list, &n->slabs_partial); | 3365 | list_add_tail(&page->lru, &n->slabs_partial); |
3515 | } | 3366 | } |
3516 | } | 3367 | } |
3517 | } | 3368 | } |
@@ -3551,10 +3402,10 @@ free_done: | |||
3551 | 3402 | ||
3552 | p = n->slabs_free.next; | 3403 | p = n->slabs_free.next; |
3553 | while (p != &(n->slabs_free)) { | 3404 | while (p != &(n->slabs_free)) { |
3554 | struct slab *slabp; | 3405 | struct page *page; |
3555 | 3406 | ||
3556 | slabp = list_entry(p, struct slab, list); | 3407 | page = list_entry(p, struct page, lru); |
3557 | BUG_ON(slabp->inuse); | 3408 | BUG_ON(page->active); |
3558 | 3409 | ||
3559 | i++; | 3410 | i++; |
3560 | p = p->next; | 3411 | p = p->next; |
@@ -4158,7 +4009,7 @@ out: | |||
4158 | #ifdef CONFIG_SLABINFO | 4009 | #ifdef CONFIG_SLABINFO |
4159 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) | 4010 | void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) |
4160 | { | 4011 | { |
4161 | struct slab *slabp; | 4012 | struct page *page; |
4162 | unsigned long active_objs; | 4013 | unsigned long active_objs; |
4163 | unsigned long num_objs; | 4014 | unsigned long num_objs; |
4164 | unsigned long active_slabs = 0; | 4015 | unsigned long active_slabs = 0; |
@@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) | |||
4178 | check_irq_on(); | 4029 | check_irq_on(); |
4179 | spin_lock_irq(&n->list_lock); | 4030 | spin_lock_irq(&n->list_lock); |
4180 | 4031 | ||
4181 | list_for_each_entry(slabp, &n->slabs_full, list) { | 4032 | list_for_each_entry(page, &n->slabs_full, lru) { |
4182 | if (slabp->inuse != cachep->num && !error) | 4033 | if (page->active != cachep->num && !error) |
4183 | error = "slabs_full accounting error"; | 4034 | error = "slabs_full accounting error"; |
4184 | active_objs += cachep->num; | 4035 | active_objs += cachep->num; |
4185 | active_slabs++; | 4036 | active_slabs++; |
4186 | } | 4037 | } |
4187 | list_for_each_entry(slabp, &n->slabs_partial, list) { | 4038 | list_for_each_entry(page, &n->slabs_partial, lru) { |
4188 | if (slabp->inuse == cachep->num && !error) | 4039 | if (page->active == cachep->num && !error) |
4189 | error = "slabs_partial inuse accounting error"; | 4040 | error = "slabs_partial accounting error"; |
4190 | if (!slabp->inuse && !error) | 4041 | if (!page->active && !error) |
4191 | error = "slabs_partial/inuse accounting error"; | 4042 | error = "slabs_partial accounting error"; |
4192 | active_objs += slabp->inuse; | 4043 | active_objs += page->active; |
4193 | active_slabs++; | 4044 | active_slabs++; |
4194 | } | 4045 | } |
4195 | list_for_each_entry(slabp, &n->slabs_free, list) { | 4046 | list_for_each_entry(page, &n->slabs_free, lru) { |
4196 | if (slabp->inuse && !error) | 4047 | if (page->active && !error) |
4197 | error = "slabs_free/inuse accounting error"; | 4048 | error = "slabs_free accounting error"; |
4198 | num_slabs++; | 4049 | num_slabs++; |
4199 | } | 4050 | } |
4200 | free_objects += n->free_objects; | 4051 | free_objects += n->free_objects; |
@@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v) | |||
4346 | return 1; | 4197 | return 1; |
4347 | } | 4198 | } |
4348 | 4199 | ||
4349 | static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) | 4200 | static void handle_slab(unsigned long *n, struct kmem_cache *c, |
4201 | struct page *page) | ||
4350 | { | 4202 | { |
4351 | void *p; | 4203 | void *p; |
4352 | int i; | 4204 | int i, j; |
4205 | |||
4353 | if (n[0] == n[1]) | 4206 | if (n[0] == n[1]) |
4354 | return; | 4207 | return; |
4355 | for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) { | 4208 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { |
4356 | if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) | 4209 | bool active = true; |
4210 | |||
4211 | for (j = page->active; j < c->num; j++) { | ||
4212 | /* Skip freed item */ | ||
4213 | if (slab_freelist(page)[j] == i) { | ||
4214 | active = false; | ||
4215 | break; | ||
4216 | } | ||
4217 | } | ||
4218 | if (!active) | ||
4357 | continue; | 4219 | continue; |
4220 | |||
4358 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | 4221 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) |
4359 | return; | 4222 | return; |
4360 | } | 4223 | } |
@@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address) | |||
4379 | static int leaks_show(struct seq_file *m, void *p) | 4242 | static int leaks_show(struct seq_file *m, void *p) |
4380 | { | 4243 | { |
4381 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); | 4244 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); |
4382 | struct slab *slabp; | 4245 | struct page *page; |
4383 | struct kmem_cache_node *n; | 4246 | struct kmem_cache_node *n; |
4384 | const char *name; | 4247 | const char *name; |
4385 | unsigned long *x = m->private; | 4248 | unsigned long *x = m->private; |
@@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p) | |||
4403 | check_irq_on(); | 4266 | check_irq_on(); |
4404 | spin_lock_irq(&n->list_lock); | 4267 | spin_lock_irq(&n->list_lock); |
4405 | 4268 | ||
4406 | list_for_each_entry(slabp, &n->slabs_full, list) | 4269 | list_for_each_entry(page, &n->slabs_full, lru) |
4407 | handle_slab(x, cachep, slabp); | 4270 | handle_slab(x, cachep, page); |
4408 | list_for_each_entry(slabp, &n->slabs_partial, list) | 4271 | list_for_each_entry(page, &n->slabs_partial, lru) |
4409 | handle_slab(x, cachep, slabp); | 4272 | handle_slab(x, cachep, page); |
4410 | spin_unlock_irq(&n->list_lock); | 4273 | spin_unlock_irq(&n->list_lock); |
4411 | } | 4274 | } |
4412 | name = cachep->name; | 4275 | name = cachep->name; |
@@ -155,7 +155,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s) | |||
155 | /* | 155 | /* |
156 | * Maximum number of desirable partial slabs. | 156 | * Maximum number of desirable partial slabs. |
157 | * The existence of more partial slabs makes kmem_cache_shrink | 157 | * The existence of more partial slabs makes kmem_cache_shrink |
158 | * sort the partial list by the number of objects in the. | 158 | * sort the partial list by the number of objects in use. |
159 | */ | 159 | */ |
160 | #define MAX_PARTIAL 10 | 160 | #define MAX_PARTIAL 10 |
161 | 161 | ||
@@ -933,6 +933,16 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, | |||
933 | * Hooks for other subsystems that check memory allocations. In a typical | 933 | * Hooks for other subsystems that check memory allocations. In a typical |
934 | * production configuration these hooks all should produce no code at all. | 934 | * production configuration these hooks all should produce no code at all. |
935 | */ | 935 | */ |
936 | static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) | ||
937 | { | ||
938 | kmemleak_alloc(ptr, size, 1, flags); | ||
939 | } | ||
940 | |||
941 | static inline void kfree_hook(const void *x) | ||
942 | { | ||
943 | kmemleak_free(x); | ||
944 | } | ||
945 | |||
936 | static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | 946 | static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) |
937 | { | 947 | { |
938 | flags &= gfp_allowed_mask; | 948 | flags &= gfp_allowed_mask; |
@@ -1217,8 +1227,8 @@ static unsigned long kmem_cache_flags(unsigned long object_size, | |||
1217 | /* | 1227 | /* |
1218 | * Enable debugging if selected on the kernel commandline. | 1228 | * Enable debugging if selected on the kernel commandline. |
1219 | */ | 1229 | */ |
1220 | if (slub_debug && (!slub_debug_slabs || | 1230 | if (slub_debug && (!slub_debug_slabs || (name && |
1221 | !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)))) | 1231 | !strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs))))) |
1222 | flags |= slub_debug; | 1232 | flags |= slub_debug; |
1223 | 1233 | ||
1224 | return flags; | 1234 | return flags; |
@@ -1260,13 +1270,30 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, | |||
1260 | static inline void dec_slabs_node(struct kmem_cache *s, int node, | 1270 | static inline void dec_slabs_node(struct kmem_cache *s, int node, |
1261 | int objects) {} | 1271 | int objects) {} |
1262 | 1272 | ||
1273 | static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) | ||
1274 | { | ||
1275 | kmemleak_alloc(ptr, size, 1, flags); | ||
1276 | } | ||
1277 | |||
1278 | static inline void kfree_hook(const void *x) | ||
1279 | { | ||
1280 | kmemleak_free(x); | ||
1281 | } | ||
1282 | |||
1263 | static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | 1283 | static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) |
1264 | { return 0; } | 1284 | { return 0; } |
1265 | 1285 | ||
1266 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, | 1286 | static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, |
1267 | void *object) {} | 1287 | void *object) |
1288 | { | ||
1289 | kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, | ||
1290 | flags & gfp_allowed_mask); | ||
1291 | } | ||
1268 | 1292 | ||
1269 | static inline void slab_free_hook(struct kmem_cache *s, void *x) {} | 1293 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
1294 | { | ||
1295 | kmemleak_free_recursive(x, s->flags); | ||
1296 | } | ||
1270 | 1297 | ||
1271 | #endif /* CONFIG_SLUB_DEBUG */ | 1298 | #endif /* CONFIG_SLUB_DEBUG */ |
1272 | 1299 | ||
@@ -2829,8 +2856,8 @@ static struct kmem_cache *kmem_cache_node; | |||
2829 | * slab on the node for this slabcache. There are no concurrent accesses | 2856 | * slab on the node for this slabcache. There are no concurrent accesses |
2830 | * possible. | 2857 | * possible. |
2831 | * | 2858 | * |
2832 | * Note that this function only works on the kmalloc_node_cache | 2859 | * Note that this function only works on the kmem_cache_node |
2833 | * when allocating for the kmalloc_node_cache. This is used for bootstrapping | 2860 | * when allocating for the kmem_cache_node. This is used for bootstrapping |
2834 | * memory on a fresh node that has no slab structures yet. | 2861 | * memory on a fresh node that has no slab structures yet. |
2835 | */ | 2862 | */ |
2836 | static void early_kmem_cache_node_alloc(int node) | 2863 | static void early_kmem_cache_node_alloc(int node) |
@@ -3272,7 +3299,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) | |||
3272 | if (page) | 3299 | if (page) |
3273 | ptr = page_address(page); | 3300 | ptr = page_address(page); |
3274 | 3301 | ||
3275 | kmemleak_alloc(ptr, size, 1, flags); | 3302 | kmalloc_large_node_hook(ptr, size, flags); |
3276 | return ptr; | 3303 | return ptr; |
3277 | } | 3304 | } |
3278 | 3305 | ||
@@ -3336,7 +3363,7 @@ void kfree(const void *x) | |||
3336 | page = virt_to_head_page(x); | 3363 | page = virt_to_head_page(x); |
3337 | if (unlikely(!PageSlab(page))) { | 3364 | if (unlikely(!PageSlab(page))) { |
3338 | BUG_ON(!PageCompound(page)); | 3365 | BUG_ON(!PageCompound(page)); |
3339 | kmemleak_free(x); | 3366 | kfree_hook(x); |
3340 | __free_memcg_kmem_pages(page, compound_order(page)); | 3367 | __free_memcg_kmem_pages(page, compound_order(page)); |
3341 | return; | 3368 | return; |
3342 | } | 3369 | } |