diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 155 |
1 files changed, 65 insertions, 90 deletions
@@ -115,6 +115,7 @@ | |||
115 | #include <linux/debugobjects.h> | 115 | #include <linux/debugobjects.h> |
116 | #include <linux/kmemcheck.h> | 116 | #include <linux/kmemcheck.h> |
117 | #include <linux/memory.h> | 117 | #include <linux/memory.h> |
118 | #include <linux/prefetch.h> | ||
118 | 119 | ||
119 | #include <asm/cacheflush.h> | 120 | #include <asm/cacheflush.h> |
120 | #include <asm/tlbflush.h> | 121 | #include <asm/tlbflush.h> |
@@ -191,22 +192,6 @@ typedef unsigned int kmem_bufctl_t; | |||
191 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) | 192 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) |
192 | 193 | ||
193 | /* | 194 | /* |
194 | * struct slab | ||
195 | * | ||
196 | * Manages the objs in a slab. Placed either at the beginning of mem allocated | ||
197 | * for a slab, or allocated from an general cache. | ||
198 | * Slabs are chained into three list: fully used, partial, fully free slabs. | ||
199 | */ | ||
200 | struct slab { | ||
201 | struct list_head list; | ||
202 | unsigned long colouroff; | ||
203 | void *s_mem; /* including colour offset */ | ||
204 | unsigned int inuse; /* num of objs active in slab */ | ||
205 | kmem_bufctl_t free; | ||
206 | unsigned short nodeid; | ||
207 | }; | ||
208 | |||
209 | /* | ||
210 | * struct slab_rcu | 195 | * struct slab_rcu |
211 | * | 196 | * |
212 | * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to | 197 | * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to |
@@ -219,8 +204,6 @@ struct slab { | |||
219 | * | 204 | * |
220 | * rcu_read_lock before reading the address, then rcu_read_unlock after | 205 | * rcu_read_lock before reading the address, then rcu_read_unlock after |
221 | * taking the spinlock within the structure expected at that address. | 206 | * taking the spinlock within the structure expected at that address. |
222 | * | ||
223 | * We assume struct slab_rcu can overlay struct slab when destroying. | ||
224 | */ | 207 | */ |
225 | struct slab_rcu { | 208 | struct slab_rcu { |
226 | struct rcu_head head; | 209 | struct rcu_head head; |
@@ -229,6 +212,27 @@ struct slab_rcu { | |||
229 | }; | 212 | }; |
230 | 213 | ||
231 | /* | 214 | /* |
215 | * struct slab | ||
216 | * | ||
217 | * Manages the objs in a slab. Placed either at the beginning of mem allocated | ||
218 | * for a slab, or allocated from an general cache. | ||
219 | * Slabs are chained into three list: fully used, partial, fully free slabs. | ||
220 | */ | ||
221 | struct slab { | ||
222 | union { | ||
223 | struct { | ||
224 | struct list_head list; | ||
225 | unsigned long colouroff; | ||
226 | void *s_mem; /* including colour offset */ | ||
227 | unsigned int inuse; /* num of objs active in slab */ | ||
228 | kmem_bufctl_t free; | ||
229 | unsigned short nodeid; | ||
230 | }; | ||
231 | struct slab_rcu __slab_cover_slab_rcu; | ||
232 | }; | ||
233 | }; | ||
234 | |||
235 | /* | ||
232 | * struct array_cache | 236 | * struct array_cache |
233 | * | 237 | * |
234 | * Purpose: | 238 | * Purpose: |
@@ -284,7 +288,7 @@ struct kmem_list3 { | |||
284 | * Need this for bootstrapping a per node allocator. | 288 | * Need this for bootstrapping a per node allocator. |
285 | */ | 289 | */ |
286 | #define NUM_INIT_LISTS (3 * MAX_NUMNODES) | 290 | #define NUM_INIT_LISTS (3 * MAX_NUMNODES) |
287 | struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | 291 | static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; |
288 | #define CACHE_CACHE 0 | 292 | #define CACHE_CACHE 0 |
289 | #define SIZE_AC MAX_NUMNODES | 293 | #define SIZE_AC MAX_NUMNODES |
290 | #define SIZE_L3 (2 * MAX_NUMNODES) | 294 | #define SIZE_L3 (2 * MAX_NUMNODES) |
@@ -829,12 +833,12 @@ static void init_reap_node(int cpu) | |||
829 | 833 | ||
830 | static void next_reap_node(void) | 834 | static void next_reap_node(void) |
831 | { | 835 | { |
832 | int node = __get_cpu_var(slab_reap_node); | 836 | int node = __this_cpu_read(slab_reap_node); |
833 | 837 | ||
834 | node = next_node(node, node_online_map); | 838 | node = next_node(node, node_online_map); |
835 | if (unlikely(node >= MAX_NUMNODES)) | 839 | if (unlikely(node >= MAX_NUMNODES)) |
836 | node = first_node(node_online_map); | 840 | node = first_node(node_online_map); |
837 | __get_cpu_var(slab_reap_node) = node; | 841 | __this_cpu_write(slab_reap_node, node); |
838 | } | 842 | } |
839 | 843 | ||
840 | #else | 844 | #else |
@@ -875,7 +879,7 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
875 | nc = kmalloc_node(memsize, gfp, node); | 879 | nc = kmalloc_node(memsize, gfp, node); |
876 | /* | 880 | /* |
877 | * The array_cache structures contain pointers to free object. | 881 | * The array_cache structures contain pointers to free object. |
878 | * However, when such objects are allocated or transfered to another | 882 | * However, when such objects are allocated or transferred to another |
879 | * cache the pointers are not cleared and they could be counted as | 883 | * cache the pointers are not cleared and they could be counted as |
880 | * valid references during a kmemleak scan. Therefore, kmemleak must | 884 | * valid references during a kmemleak scan. Therefore, kmemleak must |
881 | * not scan such objects. | 885 | * not scan such objects. |
@@ -901,7 +905,7 @@ static int transfer_objects(struct array_cache *to, | |||
901 | struct array_cache *from, unsigned int max) | 905 | struct array_cache *from, unsigned int max) |
902 | { | 906 | { |
903 | /* Figure out how many entries to transfer */ | 907 | /* Figure out how many entries to transfer */ |
904 | int nr = min(min(from->avail, max), to->limit - to->avail); | 908 | int nr = min3(from->avail, max, to->limit - to->avail); |
905 | 909 | ||
906 | if (!nr) | 910 | if (!nr) |
907 | return 0; | 911 | return 0; |
@@ -1012,7 +1016,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
1012 | */ | 1016 | */ |
1013 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | 1017 | static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) |
1014 | { | 1018 | { |
1015 | int node = __get_cpu_var(slab_reap_node); | 1019 | int node = __this_cpu_read(slab_reap_node); |
1016 | 1020 | ||
1017 | if (l3->alien) { | 1021 | if (l3->alien) { |
1018 | struct array_cache *ac = l3->alien[node]; | 1022 | struct array_cache *ac = l3->alien[node]; |
@@ -1293,7 +1297,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, | |||
1293 | * anything expensive but will only modify reap_work | 1297 | * anything expensive but will only modify reap_work |
1294 | * and reschedule the timer. | 1298 | * and reschedule the timer. |
1295 | */ | 1299 | */ |
1296 | cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu)); | 1300 | cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu)); |
1297 | /* Now the cache_reaper is guaranteed to be not running. */ | 1301 | /* Now the cache_reaper is guaranteed to be not running. */ |
1298 | per_cpu(slab_reap_work, cpu).work.func = NULL; | 1302 | per_cpu(slab_reap_work, cpu).work.func = NULL; |
1299 | break; | 1303 | break; |
@@ -1387,7 +1391,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self, | |||
1387 | break; | 1391 | break; |
1388 | } | 1392 | } |
1389 | out: | 1393 | out: |
1390 | return ret ? notifier_from_errno(ret) : NOTIFY_OK; | 1394 | return notifier_from_errno(ret); |
1391 | } | 1395 | } |
1392 | #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ | 1396 | #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ |
1393 | 1397 | ||
@@ -2147,8 +2151,6 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
2147 | * | 2151 | * |
2148 | * @name must be valid until the cache is destroyed. This implies that | 2152 | * @name must be valid until the cache is destroyed. This implies that |
2149 | * the module calling this has to destroy the cache before getting unloaded. | 2153 | * the module calling this has to destroy the cache before getting unloaded. |
2150 | * Note that kmem_cache_name() is not guaranteed to return the same pointer, | ||
2151 | * therefore applications must manage it themselves. | ||
2152 | * | 2154 | * |
2153 | * The flags are | 2155 | * The flags are |
2154 | * | 2156 | * |
@@ -2288,8 +2290,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2288 | if (ralign < align) { | 2290 | if (ralign < align) { |
2289 | ralign = align; | 2291 | ralign = align; |
2290 | } | 2292 | } |
2291 | /* disable debug if not aligning with REDZONE_ALIGN */ | 2293 | /* disable debug if necessary */ |
2292 | if (ralign & (__alignof__(unsigned long long) - 1)) | 2294 | if (ralign > __alignof__(unsigned long long)) |
2293 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2295 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
2294 | /* | 2296 | /* |
2295 | * 4) Store it. | 2297 | * 4) Store it. |
@@ -2315,8 +2317,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2315 | */ | 2317 | */ |
2316 | if (flags & SLAB_RED_ZONE) { | 2318 | if (flags & SLAB_RED_ZONE) { |
2317 | /* add space for red zone words */ | 2319 | /* add space for red zone words */ |
2318 | cachep->obj_offset += align; | 2320 | cachep->obj_offset += sizeof(unsigned long long); |
2319 | size += align + sizeof(unsigned long long); | 2321 | size += 2 * sizeof(unsigned long long); |
2320 | } | 2322 | } |
2321 | if (flags & SLAB_STORE_USER) { | 2323 | if (flags & SLAB_STORE_USER) { |
2322 | /* user store requires one word storage behind the end of | 2324 | /* user store requires one word storage behind the end of |
@@ -2605,7 +2607,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
2605 | * | 2607 | * |
2606 | * The cache must be empty before calling this function. | 2608 | * The cache must be empty before calling this function. |
2607 | * | 2609 | * |
2608 | * The caller must guarantee that noone will allocate memory from the cache | 2610 | * The caller must guarantee that no one will allocate memory from the cache |
2609 | * during the kmem_cache_destroy(). | 2611 | * during the kmem_cache_destroy(). |
2610 | */ | 2612 | */ |
2611 | void kmem_cache_destroy(struct kmem_cache *cachep) | 2613 | void kmem_cache_destroy(struct kmem_cache *cachep) |
@@ -2781,7 +2783,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, | |||
2781 | /* | 2783 | /* |
2782 | * Map pages beginning at addr to the given cache and slab. This is required | 2784 | * Map pages beginning at addr to the given cache and slab. This is required |
2783 | * for the slab allocator to be able to lookup the cache and slab of a | 2785 | * for the slab allocator to be able to lookup the cache and slab of a |
2784 | * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging. | 2786 | * virtual address for kfree, ksize, and slab debugging. |
2785 | */ | 2787 | */ |
2786 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, | 2788 | static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, |
2787 | void *addr) | 2789 | void *addr) |
@@ -3602,13 +3604,14 @@ free_done: | |||
3602 | * Release an obj back to its cache. If the obj has a constructed state, it must | 3604 | * Release an obj back to its cache. If the obj has a constructed state, it must |
3603 | * be in this state _before_ it is released. Called with disabled ints. | 3605 | * be in this state _before_ it is released. Called with disabled ints. |
3604 | */ | 3606 | */ |
3605 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) | 3607 | static inline void __cache_free(struct kmem_cache *cachep, void *objp, |
3608 | void *caller) | ||
3606 | { | 3609 | { |
3607 | struct array_cache *ac = cpu_cache_get(cachep); | 3610 | struct array_cache *ac = cpu_cache_get(cachep); |
3608 | 3611 | ||
3609 | check_irq_off(); | 3612 | check_irq_off(); |
3610 | kmemleak_free_recursive(objp, cachep->flags); | 3613 | kmemleak_free_recursive(objp, cachep->flags); |
3611 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); | 3614 | objp = cache_free_debugcheck(cachep, objp, caller); |
3612 | 3615 | ||
3613 | kmemcheck_slab_free(cachep, objp, obj_size(cachep)); | 3616 | kmemcheck_slab_free(cachep, objp, obj_size(cachep)); |
3614 | 3617 | ||
@@ -3653,42 +3656,19 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3653 | EXPORT_SYMBOL(kmem_cache_alloc); | 3656 | EXPORT_SYMBOL(kmem_cache_alloc); |
3654 | 3657 | ||
3655 | #ifdef CONFIG_TRACING | 3658 | #ifdef CONFIG_TRACING |
3656 | void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) | 3659 | void * |
3660 | kmem_cache_alloc_trace(size_t size, struct kmem_cache *cachep, gfp_t flags) | ||
3657 | { | 3661 | { |
3658 | return __cache_alloc(cachep, flags, __builtin_return_address(0)); | 3662 | void *ret; |
3659 | } | ||
3660 | EXPORT_SYMBOL(kmem_cache_alloc_notrace); | ||
3661 | #endif | ||
3662 | 3663 | ||
3663 | /** | 3664 | ret = __cache_alloc(cachep, flags, __builtin_return_address(0)); |
3664 | * kmem_ptr_validate - check if an untrusted pointer might be a slab entry. | ||
3665 | * @cachep: the cache we're checking against | ||
3666 | * @ptr: pointer to validate | ||
3667 | * | ||
3668 | * This verifies that the untrusted pointer looks sane; | ||
3669 | * it is _not_ a guarantee that the pointer is actually | ||
3670 | * part of the slab cache in question, but it at least | ||
3671 | * validates that the pointer can be dereferenced and | ||
3672 | * looks half-way sane. | ||
3673 | * | ||
3674 | * Currently only used for dentry validation. | ||
3675 | */ | ||
3676 | int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) | ||
3677 | { | ||
3678 | unsigned long size = cachep->buffer_size; | ||
3679 | struct page *page; | ||
3680 | 3665 | ||
3681 | if (unlikely(!kern_ptr_validate(ptr, size))) | 3666 | trace_kmalloc(_RET_IP_, ret, |
3682 | goto out; | 3667 | size, slab_buffer_size(cachep), flags); |
3683 | page = virt_to_page(ptr); | 3668 | return ret; |
3684 | if (unlikely(!PageSlab(page))) | ||
3685 | goto out; | ||
3686 | if (unlikely(page_get_cache(page) != cachep)) | ||
3687 | goto out; | ||
3688 | return 1; | ||
3689 | out: | ||
3690 | return 0; | ||
3691 | } | 3669 | } |
3670 | EXPORT_SYMBOL(kmem_cache_alloc_trace); | ||
3671 | #endif | ||
3692 | 3672 | ||
3693 | #ifdef CONFIG_NUMA | 3673 | #ifdef CONFIG_NUMA |
3694 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 3674 | void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) |
@@ -3705,31 +3685,32 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
3705 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 3685 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
3706 | 3686 | ||
3707 | #ifdef CONFIG_TRACING | 3687 | #ifdef CONFIG_TRACING |
3708 | void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, | 3688 | void *kmem_cache_alloc_node_trace(size_t size, |
3709 | gfp_t flags, | 3689 | struct kmem_cache *cachep, |
3710 | int nodeid) | 3690 | gfp_t flags, |
3691 | int nodeid) | ||
3711 | { | 3692 | { |
3712 | return __cache_alloc_node(cachep, flags, nodeid, | 3693 | void *ret; |
3694 | |||
3695 | ret = __cache_alloc_node(cachep, flags, nodeid, | ||
3713 | __builtin_return_address(0)); | 3696 | __builtin_return_address(0)); |
3697 | trace_kmalloc_node(_RET_IP_, ret, | ||
3698 | size, slab_buffer_size(cachep), | ||
3699 | flags, nodeid); | ||
3700 | return ret; | ||
3714 | } | 3701 | } |
3715 | EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); | 3702 | EXPORT_SYMBOL(kmem_cache_alloc_node_trace); |
3716 | #endif | 3703 | #endif |
3717 | 3704 | ||
3718 | static __always_inline void * | 3705 | static __always_inline void * |
3719 | __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) | 3706 | __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller) |
3720 | { | 3707 | { |
3721 | struct kmem_cache *cachep; | 3708 | struct kmem_cache *cachep; |
3722 | void *ret; | ||
3723 | 3709 | ||
3724 | cachep = kmem_find_general_cachep(size, flags); | 3710 | cachep = kmem_find_general_cachep(size, flags); |
3725 | if (unlikely(ZERO_OR_NULL_PTR(cachep))) | 3711 | if (unlikely(ZERO_OR_NULL_PTR(cachep))) |
3726 | return cachep; | 3712 | return cachep; |
3727 | ret = kmem_cache_alloc_node_notrace(cachep, flags, node); | 3713 | return kmem_cache_alloc_node_trace(size, cachep, flags, node); |
3728 | |||
3729 | trace_kmalloc_node((unsigned long) caller, ret, | ||
3730 | size, cachep->buffer_size, flags, node); | ||
3731 | |||
3732 | return ret; | ||
3733 | } | 3714 | } |
3734 | 3715 | ||
3735 | #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING) | 3716 | #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING) |
@@ -3821,7 +3802,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) | |||
3821 | debug_check_no_locks_freed(objp, obj_size(cachep)); | 3802 | debug_check_no_locks_freed(objp, obj_size(cachep)); |
3822 | if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) | 3803 | if (!(cachep->flags & SLAB_DEBUG_OBJECTS)) |
3823 | debug_check_no_obj_freed(objp, obj_size(cachep)); | 3804 | debug_check_no_obj_freed(objp, obj_size(cachep)); |
3824 | __cache_free(cachep, objp); | 3805 | __cache_free(cachep, objp, __builtin_return_address(0)); |
3825 | local_irq_restore(flags); | 3806 | local_irq_restore(flags); |
3826 | 3807 | ||
3827 | trace_kmem_cache_free(_RET_IP_, objp); | 3808 | trace_kmem_cache_free(_RET_IP_, objp); |
@@ -3851,7 +3832,7 @@ void kfree(const void *objp) | |||
3851 | c = virt_to_cache(objp); | 3832 | c = virt_to_cache(objp); |
3852 | debug_check_no_locks_freed(objp, obj_size(c)); | 3833 | debug_check_no_locks_freed(objp, obj_size(c)); |
3853 | debug_check_no_obj_freed(objp, obj_size(c)); | 3834 | debug_check_no_obj_freed(objp, obj_size(c)); |
3854 | __cache_free(c, (void *)objp); | 3835 | __cache_free(c, (void *)objp, __builtin_return_address(0)); |
3855 | local_irq_restore(flags); | 3836 | local_irq_restore(flags); |
3856 | } | 3837 | } |
3857 | EXPORT_SYMBOL(kfree); | 3838 | EXPORT_SYMBOL(kfree); |
@@ -3862,12 +3843,6 @@ unsigned int kmem_cache_size(struct kmem_cache *cachep) | |||
3862 | } | 3843 | } |
3863 | EXPORT_SYMBOL(kmem_cache_size); | 3844 | EXPORT_SYMBOL(kmem_cache_size); |
3864 | 3845 | ||
3865 | const char *kmem_cache_name(struct kmem_cache *cachep) | ||
3866 | { | ||
3867 | return cachep->name; | ||
3868 | } | ||
3869 | EXPORT_SYMBOL_GPL(kmem_cache_name); | ||
3870 | |||
3871 | /* | 3846 | /* |
3872 | * This initializes kmem_list3 or resizes various caches for all nodes. | 3847 | * This initializes kmem_list3 or resizes various caches for all nodes. |
3873 | */ | 3848 | */ |
@@ -4075,7 +4050,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) | |||
4075 | * necessary. Note that the l3 listlock also protects the array_cache | 4050 | * necessary. Note that the l3 listlock also protects the array_cache |
4076 | * if drain_array() is used on the shared array. | 4051 | * if drain_array() is used on the shared array. |
4077 | */ | 4052 | */ |
4078 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | 4053 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
4079 | struct array_cache *ac, int force, int node) | 4054 | struct array_cache *ac, int force, int node) |
4080 | { | 4055 | { |
4081 | int tofree; | 4056 | int tofree; |
@@ -4339,7 +4314,7 @@ static const struct seq_operations slabinfo_op = { | |||
4339 | * @count: data length | 4314 | * @count: data length |
4340 | * @ppos: unused | 4315 | * @ppos: unused |
4341 | */ | 4316 | */ |
4342 | ssize_t slabinfo_write(struct file *file, const char __user * buffer, | 4317 | static ssize_t slabinfo_write(struct file *file, const char __user *buffer, |
4343 | size_t count, loff_t *ppos) | 4318 | size_t count, loff_t *ppos) |
4344 | { | 4319 | { |
4345 | char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; | 4320 | char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; |