diff options
author | Joonsoo Kim <iamjoonsoo.kim@lge.com> | 2016-05-19 20:10:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-19 22:12:14 -0400 |
commit | 213b46958c65c7adaaf3201102da16ce0264e9cf (patch) | |
tree | 0f1a9a7bf4a6501b7a4e3016758c5b227651c03e /mm/slab.c | |
parent | 76b342bdc71badea2cbac7bf6590aa86e895c507 (diff) |
mm/slab: refill cpu cache through a new slab without holding a node lock
Until now, cache growing makes a free slab on node's slab list and then
we can allocate free objects from it. This necessarily requires to hold
a node lock which is very contended. If we refill cpu cache before
attaching it to node's slab list, we can avoid holding a node lock as
much as possible because this newly allocated slab is only visible to
the current task. This will reduce lock contention.
Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less is
better.
* Before
Kmalloc N*alloc N*free(32): Average=355/750
Kmalloc N*alloc N*free(64): Average=452/812
Kmalloc N*alloc N*free(128): Average=559/1070
Kmalloc N*alloc N*free(256): Average=1176/980
Kmalloc N*alloc N*free(512): Average=1939/1189
Kmalloc N*alloc N*free(1024): Average=3521/1278
Kmalloc N*alloc N*free(2048): Average=7152/1838
Kmalloc N*alloc N*free(4096): Average=13438/2013
* After
Kmalloc N*alloc N*free(32): Average=248/966
Kmalloc N*alloc N*free(64): Average=261/949
Kmalloc N*alloc N*free(128): Average=314/1016
Kmalloc N*alloc N*free(256): Average=741/1061
Kmalloc N*alloc N*free(512): Average=1246/1152
Kmalloc N*alloc N*free(1024): Average=2437/1259
Kmalloc N*alloc N*free(2048): Average=4980/1800
Kmalloc N*alloc N*free(4096): Average=9000/2078
It shows that contention is reduced for all the object sizes and
performance increases by 30 ~ 40%.
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 68 |
1 files changed, 36 insertions, 32 deletions
@@ -2865,6 +2865,30 @@ static noinline void *cache_alloc_pfmemalloc(struct kmem_cache *cachep, | |||
2865 | return obj; | 2865 | return obj; |
2866 | } | 2866 | } |
2867 | 2867 | ||
2868 | /* | ||
2869 | * Slab list should be fixed up by fixup_slab_list() for existing slab | ||
2870 | * or cache_grow_end() for new slab | ||
2871 | */ | ||
2872 | static __always_inline int alloc_block(struct kmem_cache *cachep, | ||
2873 | struct array_cache *ac, struct page *page, int batchcount) | ||
2874 | { | ||
2875 | /* | ||
2876 | * There must be at least one object available for | ||
2877 | * allocation. | ||
2878 | */ | ||
2879 | BUG_ON(page->active >= cachep->num); | ||
2880 | |||
2881 | while (page->active < cachep->num && batchcount--) { | ||
2882 | STATS_INC_ALLOCED(cachep); | ||
2883 | STATS_INC_ACTIVE(cachep); | ||
2884 | STATS_SET_HIGH(cachep); | ||
2885 | |||
2886 | ac->entry[ac->avail++] = slab_get_obj(cachep, page); | ||
2887 | } | ||
2888 | |||
2889 | return batchcount; | ||
2890 | } | ||
2891 | |||
2868 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | 2892 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) |
2869 | { | 2893 | { |
2870 | int batchcount; | 2894 | int batchcount; |
@@ -2877,7 +2901,6 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
2877 | check_irq_off(); | 2901 | check_irq_off(); |
2878 | node = numa_mem_id(); | 2902 | node = numa_mem_id(); |
2879 | 2903 | ||
2880 | retry: | ||
2881 | ac = cpu_cache_get(cachep); | 2904 | ac = cpu_cache_get(cachep); |
2882 | batchcount = ac->batchcount; | 2905 | batchcount = ac->batchcount; |
2883 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 2906 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
@@ -2907,21 +2930,7 @@ retry: | |||
2907 | 2930 | ||
2908 | check_spinlock_acquired(cachep); | 2931 | check_spinlock_acquired(cachep); |
2909 | 2932 | ||
2910 | /* | 2933 | batchcount = alloc_block(cachep, ac, page, batchcount); |
2911 | * The slab was either on partial or free list so | ||
2912 | * there must be at least one object available for | ||
2913 | * allocation. | ||
2914 | */ | ||
2915 | BUG_ON(page->active >= cachep->num); | ||
2916 | |||
2917 | while (page->active < cachep->num && batchcount--) { | ||
2918 | STATS_INC_ALLOCED(cachep); | ||
2919 | STATS_INC_ACTIVE(cachep); | ||
2920 | STATS_SET_HIGH(cachep); | ||
2921 | |||
2922 | ac->entry[ac->avail++] = slab_get_obj(cachep, page); | ||
2923 | } | ||
2924 | |||
2925 | fixup_slab_list(cachep, n, page, &list); | 2934 | fixup_slab_list(cachep, n, page, &list); |
2926 | } | 2935 | } |
2927 | 2936 | ||
@@ -2941,21 +2950,18 @@ alloc_done: | |||
2941 | } | 2950 | } |
2942 | 2951 | ||
2943 | page = cache_grow_begin(cachep, gfp_exact_node(flags), node); | 2952 | page = cache_grow_begin(cachep, gfp_exact_node(flags), node); |
2944 | cache_grow_end(cachep, page); | ||
2945 | 2953 | ||
2946 | /* | 2954 | /* |
2947 | * cache_grow_begin() can reenable interrupts, | 2955 | * cache_grow_begin() can reenable interrupts, |
2948 | * then ac could change. | 2956 | * then ac could change. |
2949 | */ | 2957 | */ |
2950 | ac = cpu_cache_get(cachep); | 2958 | ac = cpu_cache_get(cachep); |
2951 | node = numa_mem_id(); | 2959 | if (!ac->avail && page) |
2960 | alloc_block(cachep, ac, page, batchcount); | ||
2961 | cache_grow_end(cachep, page); | ||
2952 | 2962 | ||
2953 | /* no objects in sight? abort */ | 2963 | if (!ac->avail) |
2954 | if (!page && ac->avail == 0) | ||
2955 | return NULL; | 2964 | return NULL; |
2956 | |||
2957 | if (!ac->avail) /* objects refilled by interrupt? */ | ||
2958 | goto retry; | ||
2959 | } | 2965 | } |
2960 | ac->touched = 1; | 2966 | ac->touched = 1; |
2961 | 2967 | ||
@@ -3149,14 +3155,13 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | |||
3149 | { | 3155 | { |
3150 | struct page *page; | 3156 | struct page *page; |
3151 | struct kmem_cache_node *n; | 3157 | struct kmem_cache_node *n; |
3152 | void *obj; | 3158 | void *obj = NULL; |
3153 | void *list = NULL; | 3159 | void *list = NULL; |
3154 | 3160 | ||
3155 | VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES); | 3161 | VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES); |
3156 | n = get_node(cachep, nodeid); | 3162 | n = get_node(cachep, nodeid); |
3157 | BUG_ON(!n); | 3163 | BUG_ON(!n); |
3158 | 3164 | ||
3159 | retry: | ||
3160 | check_irq_off(); | 3165 | check_irq_off(); |
3161 | spin_lock(&n->list_lock); | 3166 | spin_lock(&n->list_lock); |
3162 | page = get_first_slab(n, false); | 3167 | page = get_first_slab(n, false); |
@@ -3178,19 +3183,18 @@ retry: | |||
3178 | 3183 | ||
3179 | spin_unlock(&n->list_lock); | 3184 | spin_unlock(&n->list_lock); |
3180 | fixup_objfreelist_debug(cachep, &list); | 3185 | fixup_objfreelist_debug(cachep, &list); |
3181 | goto done; | 3186 | return obj; |
3182 | 3187 | ||
3183 | must_grow: | 3188 | must_grow: |
3184 | spin_unlock(&n->list_lock); | 3189 | spin_unlock(&n->list_lock); |
3185 | page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); | 3190 | page = cache_grow_begin(cachep, gfp_exact_node(flags), nodeid); |
3191 | if (page) { | ||
3192 | /* This slab isn't counted yet so don't update free_objects */ | ||
3193 | obj = slab_get_obj(cachep, page); | ||
3194 | } | ||
3186 | cache_grow_end(cachep, page); | 3195 | cache_grow_end(cachep, page); |
3187 | if (page) | ||
3188 | goto retry; | ||
3189 | 3196 | ||
3190 | return fallback_alloc(cachep, flags); | 3197 | return obj ? obj : fallback_alloc(cachep, flags); |
3191 | |||
3192 | done: | ||
3193 | return obj; | ||
3194 | } | 3198 | } |
3195 | 3199 | ||
3196 | static __always_inline void * | 3200 | static __always_inline void * |