aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c192
1 files changed, 174 insertions, 18 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 1fcf3ac94b6c..55d84a22ad96 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -124,6 +124,8 @@
124 124
125#include <trace/events/kmem.h> 125#include <trace/events/kmem.h>
126 126
127#include "internal.h"
128
127/* 129/*
128 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. 130 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
129 * 0 for faster, smaller code (especially in the critical paths). 131 * 0 for faster, smaller code (especially in the critical paths).
@@ -152,6 +154,12 @@
152#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN 154#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
153#endif 155#endif
154 156
157/*
158 * true if a page was allocated from pfmemalloc reserves for network-based
159 * swap
160 */
161static bool pfmemalloc_active __read_mostly;
162
155/* Legal flag mask for kmem_cache_create(). */ 163/* Legal flag mask for kmem_cache_create(). */
156#if DEBUG 164#if DEBUG
157# define CREATE_MASK (SLAB_RED_ZONE | \ 165# define CREATE_MASK (SLAB_RED_ZONE | \
@@ -257,9 +265,30 @@ struct array_cache {
257 * Must have this definition in here for the proper 265 * Must have this definition in here for the proper
258 * alignment of array_cache. Also simplifies accessing 266 * alignment of array_cache. Also simplifies accessing
259 * the entries. 267 * the entries.
268 *
269 * Entries should not be directly dereferenced as
270 * entries belonging to slabs marked pfmemalloc will
271 * have the lower bits set SLAB_OBJ_PFMEMALLOC
260 */ 272 */
261}; 273};
262 274
275#define SLAB_OBJ_PFMEMALLOC 1
276static inline bool is_obj_pfmemalloc(void *objp)
277{
278 return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
279}
280
281static inline void set_obj_pfmemalloc(void **objp)
282{
283 *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
284 return;
285}
286
287static inline void clear_obj_pfmemalloc(void **objp)
288{
289 *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
290}
291
263/* 292/*
264 * bootstrap: The caches do not work without cpuarrays anymore, but the 293 * bootstrap: The caches do not work without cpuarrays anymore, but the
265 * cpuarrays are allocated from the generic caches... 294 * cpuarrays are allocated from the generic caches...
@@ -900,6 +929,102 @@ static struct array_cache *alloc_arraycache(int node, int entries,
900 return nc; 929 return nc;
901} 930}
902 931
932static inline bool is_slab_pfmemalloc(struct slab *slabp)
933{
934 struct page *page = virt_to_page(slabp->s_mem);
935
936 return PageSlabPfmemalloc(page);
937}
938
939/* Clears pfmemalloc_active if no slabs have pfmalloc set */
940static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
941 struct array_cache *ac)
942{
943 struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()];
944 struct slab *slabp;
945 unsigned long flags;
946
947 if (!pfmemalloc_active)
948 return;
949
950 spin_lock_irqsave(&l3->list_lock, flags);
951 list_for_each_entry(slabp, &l3->slabs_full, list)
952 if (is_slab_pfmemalloc(slabp))
953 goto out;
954
955 list_for_each_entry(slabp, &l3->slabs_partial, list)
956 if (is_slab_pfmemalloc(slabp))
957 goto out;
958
959 list_for_each_entry(slabp, &l3->slabs_free, list)
960 if (is_slab_pfmemalloc(slabp))
961 goto out;
962
963 pfmemalloc_active = false;
964out:
965 spin_unlock_irqrestore(&l3->list_lock, flags);
966}
967
968static void *ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
969 gfp_t flags, bool force_refill)
970{
971 int i;
972 void *objp = ac->entry[--ac->avail];
973
974 /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
975 if (unlikely(is_obj_pfmemalloc(objp))) {
976 struct kmem_list3 *l3;
977
978 if (gfp_pfmemalloc_allowed(flags)) {
979 clear_obj_pfmemalloc(&objp);
980 return objp;
981 }
982
983 /* The caller cannot use PFMEMALLOC objects, find another one */
984 for (i = 1; i < ac->avail; i++) {
985 /* If a !PFMEMALLOC object is found, swap them */
986 if (!is_obj_pfmemalloc(ac->entry[i])) {
987 objp = ac->entry[i];
988 ac->entry[i] = ac->entry[ac->avail];
989 ac->entry[ac->avail] = objp;
990 return objp;
991 }
992 }
993
994 /*
995 * If there are empty slabs on the slabs_free list and we are
996 * being forced to refill the cache, mark this one !pfmemalloc.
997 */
998 l3 = cachep->nodelists[numa_mem_id()];
999 if (!list_empty(&l3->slabs_free) && force_refill) {
1000 struct slab *slabp = virt_to_slab(objp);
1001 ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem));
1002 clear_obj_pfmemalloc(&objp);
1003 recheck_pfmemalloc_active(cachep, ac);
1004 return objp;
1005 }
1006
1007 /* No !PFMEMALLOC objects available */
1008 ac->avail++;
1009 objp = NULL;
1010 }
1011
1012 return objp;
1013}
1014
1015static void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
1016 void *objp)
1017{
1018 if (unlikely(pfmemalloc_active)) {
1019 /* Some pfmemalloc slabs exist, check if this is one */
1020 struct page *page = virt_to_page(objp);
1021 if (PageSlabPfmemalloc(page))
1022 set_obj_pfmemalloc(&objp);
1023 }
1024
1025 ac->entry[ac->avail++] = objp;
1026}
1027
903/* 1028/*
904 * Transfer objects in one arraycache to another. 1029 * Transfer objects in one arraycache to another.
905 * Locking must be handled by the caller. 1030 * Locking must be handled by the caller.
@@ -1076,7 +1201,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1076 STATS_INC_ACOVERFLOW(cachep); 1201 STATS_INC_ACOVERFLOW(cachep);
1077 __drain_alien_cache(cachep, alien, nodeid); 1202 __drain_alien_cache(cachep, alien, nodeid);
1078 } 1203 }
1079 alien->entry[alien->avail++] = objp; 1204 ac_put_obj(cachep, alien, objp);
1080 spin_unlock(&alien->lock); 1205 spin_unlock(&alien->lock);
1081 } else { 1206 } else {
1082 spin_lock(&(cachep->nodelists[nodeid])->list_lock); 1207 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
@@ -1759,6 +1884,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1759 return NULL; 1884 return NULL;
1760 } 1885 }
1761 1886
1887 /* Record if ALLOC_PFMEMALLOC was set when allocating the slab */
1888 if (unlikely(page->pfmemalloc))
1889 pfmemalloc_active = true;
1890
1762 nr_pages = (1 << cachep->gfporder); 1891 nr_pages = (1 << cachep->gfporder);
1763 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1892 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1764 add_zone_page_state(page_zone(page), 1893 add_zone_page_state(page_zone(page),
@@ -1766,9 +1895,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1766 else 1895 else
1767 add_zone_page_state(page_zone(page), 1896 add_zone_page_state(page_zone(page),
1768 NR_SLAB_UNRECLAIMABLE, nr_pages); 1897 NR_SLAB_UNRECLAIMABLE, nr_pages);
1769 for (i = 0; i < nr_pages; i++) 1898 for (i = 0; i < nr_pages; i++) {
1770 __SetPageSlab(page + i); 1899 __SetPageSlab(page + i);
1771 1900
1901 if (page->pfmemalloc)
1902 SetPageSlabPfmemalloc(page + i);
1903 }
1904
1772 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { 1905 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1773 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); 1906 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1774 1907
@@ -1800,6 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1800 NR_SLAB_UNRECLAIMABLE, nr_freed); 1933 NR_SLAB_UNRECLAIMABLE, nr_freed);
1801 while (i--) { 1934 while (i--) {
1802 BUG_ON(!PageSlab(page)); 1935 BUG_ON(!PageSlab(page));
1936 __ClearPageSlabPfmemalloc(page);
1803 __ClearPageSlab(page); 1937 __ClearPageSlab(page);
1804 page++; 1938 page++;
1805 } 1939 }
@@ -3015,16 +3149,19 @@ bad:
3015#define check_slabp(x,y) do { } while(0) 3149#define check_slabp(x,y) do { } while(0)
3016#endif 3150#endif
3017 3151
3018static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) 3152static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
3153 bool force_refill)
3019{ 3154{
3020 int batchcount; 3155 int batchcount;
3021 struct kmem_list3 *l3; 3156 struct kmem_list3 *l3;
3022 struct array_cache *ac; 3157 struct array_cache *ac;
3023 int node; 3158 int node;
3024 3159
3025retry:
3026 check_irq_off(); 3160 check_irq_off();
3027 node = numa_mem_id(); 3161 node = numa_mem_id();
3162 if (unlikely(force_refill))
3163 goto force_grow;
3164retry:
3028 ac = cpu_cache_get(cachep); 3165 ac = cpu_cache_get(cachep);
3029 batchcount = ac->batchcount; 3166 batchcount = ac->batchcount;
3030 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 3167 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3074,8 +3211,8 @@ retry:
3074 STATS_INC_ACTIVE(cachep); 3211 STATS_INC_ACTIVE(cachep);
3075 STATS_SET_HIGH(cachep); 3212 STATS_SET_HIGH(cachep);
3076 3213
3077 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, 3214 ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
3078 node); 3215 node));
3079 } 3216 }
3080 check_slabp(cachep, slabp); 3217 check_slabp(cachep, slabp);
3081 3218
@@ -3094,18 +3231,22 @@ alloc_done:
3094 3231
3095 if (unlikely(!ac->avail)) { 3232 if (unlikely(!ac->avail)) {
3096 int x; 3233 int x;
3234force_grow:
3097 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); 3235 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3098 3236
3099 /* cache_grow can reenable interrupts, then ac could change. */ 3237 /* cache_grow can reenable interrupts, then ac could change. */
3100 ac = cpu_cache_get(cachep); 3238 ac = cpu_cache_get(cachep);
3101 if (!x && ac->avail == 0) /* no objects in sight? abort */ 3239
3240 /* no objects in sight? abort */
3241 if (!x && (ac->avail == 0 || force_refill))
3102 return NULL; 3242 return NULL;
3103 3243
3104 if (!ac->avail) /* objects refilled by interrupt? */ 3244 if (!ac->avail) /* objects refilled by interrupt? */
3105 goto retry; 3245 goto retry;
3106 } 3246 }
3107 ac->touched = 1; 3247 ac->touched = 1;
3108 return ac->entry[--ac->avail]; 3248
3249 return ac_get_obj(cachep, ac, flags, force_refill);
3109} 3250}
3110 3251
3111static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, 3252static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
@@ -3187,23 +3328,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3187{ 3328{
3188 void *objp; 3329 void *objp;
3189 struct array_cache *ac; 3330 struct array_cache *ac;
3331 bool force_refill = false;
3190 3332
3191 check_irq_off(); 3333 check_irq_off();
3192 3334
3193 ac = cpu_cache_get(cachep); 3335 ac = cpu_cache_get(cachep);
3194 if (likely(ac->avail)) { 3336 if (likely(ac->avail)) {
3195 STATS_INC_ALLOCHIT(cachep);
3196 ac->touched = 1; 3337 ac->touched = 1;
3197 objp = ac->entry[--ac->avail]; 3338 objp = ac_get_obj(cachep, ac, flags, false);
3198 } else { 3339
3199 STATS_INC_ALLOCMISS(cachep);
3200 objp = cache_alloc_refill(cachep, flags);
3201 /* 3340 /*
3202 * the 'ac' may be updated by cache_alloc_refill(), 3341 * Allow for the possibility all avail objects are not allowed
3203 * and kmemleak_erase() requires its correct value. 3342 * by the current flags
3204 */ 3343 */
3205 ac = cpu_cache_get(cachep); 3344 if (objp) {
3345 STATS_INC_ALLOCHIT(cachep);
3346 goto out;
3347 }
3348 force_refill = true;
3206 } 3349 }
3350
3351 STATS_INC_ALLOCMISS(cachep);
3352 objp = cache_alloc_refill(cachep, flags, force_refill);
3353 /*
3354 * the 'ac' may be updated by cache_alloc_refill(),
3355 * and kmemleak_erase() requires its correct value.
3356 */
3357 ac = cpu_cache_get(cachep);
3358
3359out:
3207 /* 3360 /*
3208 * To avoid a false negative, if an object that is in one of the 3361 * To avoid a false negative, if an object that is in one of the
3209 * per-CPU caches is leaked, we need to make sure kmemleak doesn't 3362 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
@@ -3525,9 +3678,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3525 struct kmem_list3 *l3; 3678 struct kmem_list3 *l3;
3526 3679
3527 for (i = 0; i < nr_objects; i++) { 3680 for (i = 0; i < nr_objects; i++) {
3528 void *objp = objpp[i]; 3681 void *objp;
3529 struct slab *slabp; 3682 struct slab *slabp;
3530 3683
3684 clear_obj_pfmemalloc(&objpp[i]);
3685 objp = objpp[i];
3686
3531 slabp = virt_to_slab(objp); 3687 slabp = virt_to_slab(objp);
3532 l3 = cachep->nodelists[node]; 3688 l3 = cachep->nodelists[node];
3533 list_del(&slabp->list); 3689 list_del(&slabp->list);
@@ -3645,7 +3801,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3645 cache_flusharray(cachep, ac); 3801 cache_flusharray(cachep, ac);
3646 } 3802 }
3647 3803
3648 ac->entry[ac->avail++] = objp; 3804 ac_put_obj(cachep, ac, objp);
3649} 3805}
3650 3806
3651/** 3807/**