aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm_types.h9
-rw-r--r--include/linux/page-flags.h29
-rw-r--r--mm/internal.h3
-rw-r--r--mm/page_alloc.c27
-rw-r--r--mm/slab.c192
-rw-r--r--mm/slub.c29
6 files changed, 264 insertions, 25 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 074eb98fe15..375e79eb009 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -54,6 +54,15 @@ struct page {
54 union { 54 union {
55 pgoff_t index; /* Our offset within mapping. */ 55 pgoff_t index; /* Our offset within mapping. */
56 void *freelist; /* slub/slob first free object */ 56 void *freelist; /* slub/slob first free object */
57 bool pfmemalloc; /* If set by the page allocator,
58 * ALLOC_PFMEMALLOC was set
59 * and the low watermark was not
60 * met implying that the system
61 * is under some pressure. The
62 * caller should try ensure
63 * this page is only used to
64 * free other pages.
65 */
57 }; 66 };
58 67
59 union { 68 union {
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c88d2a9451a..b5d13841604 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -7,6 +7,7 @@
7 7
8#include <linux/types.h> 8#include <linux/types.h>
9#include <linux/bug.h> 9#include <linux/bug.h>
10#include <linux/mmdebug.h>
10#ifndef __GENERATING_BOUNDS_H 11#ifndef __GENERATING_BOUNDS_H
11#include <linux/mm_types.h> 12#include <linux/mm_types.h>
12#include <generated/bounds.h> 13#include <generated/bounds.h>
@@ -453,6 +454,34 @@ static inline int PageTransTail(struct page *page)
453} 454}
454#endif 455#endif
455 456
457/*
458 * If network-based swap is enabled, sl*b must keep track of whether pages
459 * were allocated from pfmemalloc reserves.
460 */
461static inline int PageSlabPfmemalloc(struct page *page)
462{
463 VM_BUG_ON(!PageSlab(page));
464 return PageActive(page);
465}
466
467static inline void SetPageSlabPfmemalloc(struct page *page)
468{
469 VM_BUG_ON(!PageSlab(page));
470 SetPageActive(page);
471}
472
473static inline void __ClearPageSlabPfmemalloc(struct page *page)
474{
475 VM_BUG_ON(!PageSlab(page));
476 __ClearPageActive(page);
477}
478
479static inline void ClearPageSlabPfmemalloc(struct page *page)
480{
481 VM_BUG_ON(!PageSlab(page));
482 ClearPageActive(page);
483}
484
456#ifdef CONFIG_MMU 485#ifdef CONFIG_MMU
457#define __PG_MLOCKED (1 << PG_mlocked) 486#define __PG_MLOCKED (1 << PG_mlocked)
458#else 487#else
diff --git a/mm/internal.h b/mm/internal.h
index 3314f79d775..eb76b67890d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -279,6 +279,9 @@ static inline struct page *mem_map_next(struct page *iter,
279#define __paginginit __init 279#define __paginginit __init
280#endif 280#endif
281 281
282/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
283bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
284
282/* Memory initialisation debug and verification */ 285/* Memory initialisation debug and verification */
283enum mminit_level { 286enum mminit_level {
284 MMINIT_WARNING, 287 MMINIT_WARNING,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6a29ed8e6e6..38e5be65f24 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1513,6 +1513,7 @@ failed:
1513#define ALLOC_HARDER 0x10 /* try to alloc harder */ 1513#define ALLOC_HARDER 0x10 /* try to alloc harder */
1514#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ 1514#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
1515#define ALLOC_CPUSET 0x40 /* check for correct cpuset */ 1515#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
1516#define ALLOC_PFMEMALLOC 0x80 /* Caller has PF_MEMALLOC set */
1516 1517
1517#ifdef CONFIG_FAIL_PAGE_ALLOC 1518#ifdef CONFIG_FAIL_PAGE_ALLOC
1518 1519
@@ -2293,16 +2294,22 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2293 } else if (unlikely(rt_task(current)) && !in_interrupt()) 2294 } else if (unlikely(rt_task(current)) && !in_interrupt())
2294 alloc_flags |= ALLOC_HARDER; 2295 alloc_flags |= ALLOC_HARDER;
2295 2296
2296 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { 2297 if ((current->flags & PF_MEMALLOC) ||
2297 if (!in_interrupt() && 2298 unlikely(test_thread_flag(TIF_MEMDIE))) {
2298 ((current->flags & PF_MEMALLOC) || 2299 alloc_flags |= ALLOC_PFMEMALLOC;
2299 unlikely(test_thread_flag(TIF_MEMDIE)))) 2300
2301 if (likely(!(gfp_mask & __GFP_NOMEMALLOC)) && !in_interrupt())
2300 alloc_flags |= ALLOC_NO_WATERMARKS; 2302 alloc_flags |= ALLOC_NO_WATERMARKS;
2301 } 2303 }
2302 2304
2303 return alloc_flags; 2305 return alloc_flags;
2304} 2306}
2305 2307
2308bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
2309{
2310 return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_PFMEMALLOC);
2311}
2312
2306static inline struct page * 2313static inline struct page *
2307__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, 2314__alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2308 struct zonelist *zonelist, enum zone_type high_zoneidx, 2315 struct zonelist *zonelist, enum zone_type high_zoneidx,
@@ -2490,10 +2497,18 @@ nopage:
2490 warn_alloc_failed(gfp_mask, order, NULL); 2497 warn_alloc_failed(gfp_mask, order, NULL);
2491 return page; 2498 return page;
2492got_pg: 2499got_pg:
2500 /*
2501 * page->pfmemalloc is set when the caller had PFMEMALLOC set or is
2502 * been OOM killed. The expectation is that the caller is taking
2503 * steps that will free more memory. The caller should avoid the
2504 * page being used for !PFMEMALLOC purposes.
2505 */
2506 page->pfmemalloc = !!(alloc_flags & ALLOC_PFMEMALLOC);
2507
2493 if (kmemcheck_enabled) 2508 if (kmemcheck_enabled)
2494 kmemcheck_pagealloc_alloc(page, order, gfp_mask); 2509 kmemcheck_pagealloc_alloc(page, order, gfp_mask);
2495 return page;
2496 2510
2511 return page;
2497} 2512}
2498 2513
2499/* 2514/*
@@ -2544,6 +2559,8 @@ retry_cpuset:
2544 page = __alloc_pages_slowpath(gfp_mask, order, 2559 page = __alloc_pages_slowpath(gfp_mask, order,
2545 zonelist, high_zoneidx, nodemask, 2560 zonelist, high_zoneidx, nodemask,
2546 preferred_zone, migratetype); 2561 preferred_zone, migratetype);
2562 else
2563 page->pfmemalloc = false;
2547 2564
2548 trace_mm_page_alloc(page, order, gfp_mask, migratetype); 2565 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2549 2566
diff --git a/mm/slab.c b/mm/slab.c
index 1fcf3ac94b6..55d84a22ad9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -124,6 +124,8 @@
124 124
125#include <trace/events/kmem.h> 125#include <trace/events/kmem.h>
126 126
127#include "internal.h"
128
127/* 129/*
128 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. 130 * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
129 * 0 for faster, smaller code (especially in the critical paths). 131 * 0 for faster, smaller code (especially in the critical paths).
@@ -152,6 +154,12 @@
152#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN 154#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
153#endif 155#endif
154 156
157/*
158 * true if a page was allocated from pfmemalloc reserves for network-based
159 * swap
160 */
161static bool pfmemalloc_active __read_mostly;
162
155/* Legal flag mask for kmem_cache_create(). */ 163/* Legal flag mask for kmem_cache_create(). */
156#if DEBUG 164#if DEBUG
157# define CREATE_MASK (SLAB_RED_ZONE | \ 165# define CREATE_MASK (SLAB_RED_ZONE | \
@@ -257,9 +265,30 @@ struct array_cache {
257 * Must have this definition in here for the proper 265 * Must have this definition in here for the proper
258 * alignment of array_cache. Also simplifies accessing 266 * alignment of array_cache. Also simplifies accessing
259 * the entries. 267 * the entries.
268 *
269 * Entries should not be directly dereferenced as
270 * entries belonging to slabs marked pfmemalloc will
271 * have the lower bits set SLAB_OBJ_PFMEMALLOC
260 */ 272 */
261}; 273};
262 274
275#define SLAB_OBJ_PFMEMALLOC 1
276static inline bool is_obj_pfmemalloc(void *objp)
277{
278 return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
279}
280
281static inline void set_obj_pfmemalloc(void **objp)
282{
283 *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
284 return;
285}
286
287static inline void clear_obj_pfmemalloc(void **objp)
288{
289 *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
290}
291
263/* 292/*
264 * bootstrap: The caches do not work without cpuarrays anymore, but the 293 * bootstrap: The caches do not work without cpuarrays anymore, but the
265 * cpuarrays are allocated from the generic caches... 294 * cpuarrays are allocated from the generic caches...
@@ -900,6 +929,102 @@ static struct array_cache *alloc_arraycache(int node, int entries,
900 return nc; 929 return nc;
901} 930}
902 931
932static inline bool is_slab_pfmemalloc(struct slab *slabp)
933{
934 struct page *page = virt_to_page(slabp->s_mem);
935
936 return PageSlabPfmemalloc(page);
937}
938
939/* Clears pfmemalloc_active if no slabs have pfmalloc set */
940static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
941 struct array_cache *ac)
942{
943 struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()];
944 struct slab *slabp;
945 unsigned long flags;
946
947 if (!pfmemalloc_active)
948 return;
949
950 spin_lock_irqsave(&l3->list_lock, flags);
951 list_for_each_entry(slabp, &l3->slabs_full, list)
952 if (is_slab_pfmemalloc(slabp))
953 goto out;
954
955 list_for_each_entry(slabp, &l3->slabs_partial, list)
956 if (is_slab_pfmemalloc(slabp))
957 goto out;
958
959 list_for_each_entry(slabp, &l3->slabs_free, list)
960 if (is_slab_pfmemalloc(slabp))
961 goto out;
962
963 pfmemalloc_active = false;
964out:
965 spin_unlock_irqrestore(&l3->list_lock, flags);
966}
967
968static void *ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
969 gfp_t flags, bool force_refill)
970{
971 int i;
972 void *objp = ac->entry[--ac->avail];
973
974 /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
975 if (unlikely(is_obj_pfmemalloc(objp))) {
976 struct kmem_list3 *l3;
977
978 if (gfp_pfmemalloc_allowed(flags)) {
979 clear_obj_pfmemalloc(&objp);
980 return objp;
981 }
982
983 /* The caller cannot use PFMEMALLOC objects, find another one */
984 for (i = 1; i < ac->avail; i++) {
985 /* If a !PFMEMALLOC object is found, swap them */
986 if (!is_obj_pfmemalloc(ac->entry[i])) {
987 objp = ac->entry[i];
988 ac->entry[i] = ac->entry[ac->avail];
989 ac->entry[ac->avail] = objp;
990 return objp;
991 }
992 }
993
994 /*
995 * If there are empty slabs on the slabs_free list and we are
996 * being forced to refill the cache, mark this one !pfmemalloc.
997 */
998 l3 = cachep->nodelists[numa_mem_id()];
999 if (!list_empty(&l3->slabs_free) && force_refill) {
1000 struct slab *slabp = virt_to_slab(objp);
1001 ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem));
1002 clear_obj_pfmemalloc(&objp);
1003 recheck_pfmemalloc_active(cachep, ac);
1004 return objp;
1005 }
1006
1007 /* No !PFMEMALLOC objects available */
1008 ac->avail++;
1009 objp = NULL;
1010 }
1011
1012 return objp;
1013}
1014
1015static void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
1016 void *objp)
1017{
1018 if (unlikely(pfmemalloc_active)) {
1019 /* Some pfmemalloc slabs exist, check if this is one */
1020 struct page *page = virt_to_page(objp);
1021 if (PageSlabPfmemalloc(page))
1022 set_obj_pfmemalloc(&objp);
1023 }
1024
1025 ac->entry[ac->avail++] = objp;
1026}
1027
903/* 1028/*
904 * Transfer objects in one arraycache to another. 1029 * Transfer objects in one arraycache to another.
905 * Locking must be handled by the caller. 1030 * Locking must be handled by the caller.
@@ -1076,7 +1201,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1076 STATS_INC_ACOVERFLOW(cachep); 1201 STATS_INC_ACOVERFLOW(cachep);
1077 __drain_alien_cache(cachep, alien, nodeid); 1202 __drain_alien_cache(cachep, alien, nodeid);
1078 } 1203 }
1079 alien->entry[alien->avail++] = objp; 1204 ac_put_obj(cachep, alien, objp);
1080 spin_unlock(&alien->lock); 1205 spin_unlock(&alien->lock);
1081 } else { 1206 } else {
1082 spin_lock(&(cachep->nodelists[nodeid])->list_lock); 1207 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
@@ -1759,6 +1884,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1759 return NULL; 1884 return NULL;
1760 } 1885 }
1761 1886
1887 /* Record if ALLOC_PFMEMALLOC was set when allocating the slab */
1888 if (unlikely(page->pfmemalloc))
1889 pfmemalloc_active = true;
1890
1762 nr_pages = (1 << cachep->gfporder); 1891 nr_pages = (1 << cachep->gfporder);
1763 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1892 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1764 add_zone_page_state(page_zone(page), 1893 add_zone_page_state(page_zone(page),
@@ -1766,9 +1895,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1766 else 1895 else
1767 add_zone_page_state(page_zone(page), 1896 add_zone_page_state(page_zone(page),
1768 NR_SLAB_UNRECLAIMABLE, nr_pages); 1897 NR_SLAB_UNRECLAIMABLE, nr_pages);
1769 for (i = 0; i < nr_pages; i++) 1898 for (i = 0; i < nr_pages; i++) {
1770 __SetPageSlab(page + i); 1899 __SetPageSlab(page + i);
1771 1900
1901 if (page->pfmemalloc)
1902 SetPageSlabPfmemalloc(page + i);
1903 }
1904
1772 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { 1905 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1773 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); 1906 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1774 1907
@@ -1800,6 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1800 NR_SLAB_UNRECLAIMABLE, nr_freed); 1933 NR_SLAB_UNRECLAIMABLE, nr_freed);
1801 while (i--) { 1934 while (i--) {
1802 BUG_ON(!PageSlab(page)); 1935 BUG_ON(!PageSlab(page));
1936 __ClearPageSlabPfmemalloc(page);
1803 __ClearPageSlab(page); 1937 __ClearPageSlab(page);
1804 page++; 1938 page++;
1805 } 1939 }
@@ -3015,16 +3149,19 @@ bad:
3015#define check_slabp(x,y) do { } while(0) 3149#define check_slabp(x,y) do { } while(0)
3016#endif 3150#endif
3017 3151
3018static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) 3152static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
3153 bool force_refill)
3019{ 3154{
3020 int batchcount; 3155 int batchcount;
3021 struct kmem_list3 *l3; 3156 struct kmem_list3 *l3;
3022 struct array_cache *ac; 3157 struct array_cache *ac;
3023 int node; 3158 int node;
3024 3159
3025retry:
3026 check_irq_off(); 3160 check_irq_off();
3027 node = numa_mem_id(); 3161 node = numa_mem_id();
3162 if (unlikely(force_refill))
3163 goto force_grow;
3164retry:
3028 ac = cpu_cache_get(cachep); 3165 ac = cpu_cache_get(cachep);
3029 batchcount = ac->batchcount; 3166 batchcount = ac->batchcount;
3030 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 3167 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3074,8 +3211,8 @@ retry:
3074 STATS_INC_ACTIVE(cachep); 3211 STATS_INC_ACTIVE(cachep);
3075 STATS_SET_HIGH(cachep); 3212 STATS_SET_HIGH(cachep);
3076 3213
3077 ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, 3214 ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp,
3078 node); 3215 node));
3079 } 3216 }
3080 check_slabp(cachep, slabp); 3217 check_slabp(cachep, slabp);
3081 3218
@@ -3094,18 +3231,22 @@ alloc_done:
3094 3231
3095 if (unlikely(!ac->avail)) { 3232 if (unlikely(!ac->avail)) {
3096 int x; 3233 int x;
3234force_grow:
3097 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); 3235 x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
3098 3236
3099 /* cache_grow can reenable interrupts, then ac could change. */ 3237 /* cache_grow can reenable interrupts, then ac could change. */
3100 ac = cpu_cache_get(cachep); 3238 ac = cpu_cache_get(cachep);
3101 if (!x && ac->avail == 0) /* no objects in sight? abort */ 3239
3240 /* no objects in sight? abort */
3241 if (!x && (ac->avail == 0 || force_refill))
3102 return NULL; 3242 return NULL;
3103 3243
3104 if (!ac->avail) /* objects refilled by interrupt? */ 3244 if (!ac->avail) /* objects refilled by interrupt? */
3105 goto retry; 3245 goto retry;
3106 } 3246 }
3107 ac->touched = 1; 3247 ac->touched = 1;
3108 return ac->entry[--ac->avail]; 3248
3249 return ac_get_obj(cachep, ac, flags, force_refill);
3109} 3250}
3110 3251
3111static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, 3252static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
@@ -3187,23 +3328,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3187{ 3328{
3188 void *objp; 3329 void *objp;
3189 struct array_cache *ac; 3330 struct array_cache *ac;
3331 bool force_refill = false;
3190 3332
3191 check_irq_off(); 3333 check_irq_off();
3192 3334
3193 ac = cpu_cache_get(cachep); 3335 ac = cpu_cache_get(cachep);
3194 if (likely(ac->avail)) { 3336 if (likely(ac->avail)) {
3195 STATS_INC_ALLOCHIT(cachep);
3196 ac->touched = 1; 3337 ac->touched = 1;
3197 objp = ac->entry[--ac->avail]; 3338 objp = ac_get_obj(cachep, ac, flags, false);
3198 } else { 3339
3199 STATS_INC_ALLOCMISS(cachep);
3200 objp = cache_alloc_refill(cachep, flags);
3201 /* 3340 /*
3202 * the 'ac' may be updated by cache_alloc_refill(), 3341 * Allow for the possibility all avail objects are not allowed
3203 * and kmemleak_erase() requires its correct value. 3342 * by the current flags
3204 */ 3343 */
3205 ac = cpu_cache_get(cachep); 3344 if (objp) {
3345 STATS_INC_ALLOCHIT(cachep);
3346 goto out;
3347 }
3348 force_refill = true;
3206 } 3349 }
3350
3351 STATS_INC_ALLOCMISS(cachep);
3352 objp = cache_alloc_refill(cachep, flags, force_refill);
3353 /*
3354 * the 'ac' may be updated by cache_alloc_refill(),
3355 * and kmemleak_erase() requires its correct value.
3356 */
3357 ac = cpu_cache_get(cachep);
3358
3359out:
3207 /* 3360 /*
3208 * To avoid a false negative, if an object that is in one of the 3361 * To avoid a false negative, if an object that is in one of the
3209 * per-CPU caches is leaked, we need to make sure kmemleak doesn't 3362 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
@@ -3525,9 +3678,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3525 struct kmem_list3 *l3; 3678 struct kmem_list3 *l3;
3526 3679
3527 for (i = 0; i < nr_objects; i++) { 3680 for (i = 0; i < nr_objects; i++) {
3528 void *objp = objpp[i]; 3681 void *objp;
3529 struct slab *slabp; 3682 struct slab *slabp;
3530 3683
3684 clear_obj_pfmemalloc(&objpp[i]);
3685 objp = objpp[i];
3686
3531 slabp = virt_to_slab(objp); 3687 slabp = virt_to_slab(objp);
3532 l3 = cachep->nodelists[node]; 3688 l3 = cachep->nodelists[node];
3533 list_del(&slabp->list); 3689 list_del(&slabp->list);
@@ -3645,7 +3801,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp,
3645 cache_flusharray(cachep, ac); 3801 cache_flusharray(cachep, ac);
3646 } 3802 }
3647 3803
3648 ac->entry[ac->avail++] = objp; 3804 ac_put_obj(cachep, ac, objp);
3649} 3805}
3650 3806
3651/** 3807/**
diff --git a/mm/slub.c b/mm/slub.c
index e517d435e5d..c3f05e1599c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -34,6 +34,8 @@
34 34
35#include <trace/events/kmem.h> 35#include <trace/events/kmem.h>
36 36
37#include "internal.h"
38
37/* 39/*
38 * Lock order: 40 * Lock order:
39 * 1. slab_mutex (Global Mutex) 41 * 1. slab_mutex (Global Mutex)
@@ -1354,6 +1356,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1354 inc_slabs_node(s, page_to_nid(page), page->objects); 1356 inc_slabs_node(s, page_to_nid(page), page->objects);
1355 page->slab = s; 1357 page->slab = s;
1356 __SetPageSlab(page); 1358 __SetPageSlab(page);
1359 if (page->pfmemalloc)
1360 SetPageSlabPfmemalloc(page);
1357 1361
1358 start = page_address(page); 1362 start = page_address(page);
1359 1363
@@ -1397,6 +1401,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1397 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1401 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1398 -pages); 1402 -pages);
1399 1403
1404 __ClearPageSlabPfmemalloc(page);
1400 __ClearPageSlab(page); 1405 __ClearPageSlab(page);
1401 reset_page_mapcount(page); 1406 reset_page_mapcount(page);
1402 if (current->reclaim_state) 1407 if (current->reclaim_state)
@@ -2126,6 +2131,14 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
2126 return freelist; 2131 return freelist;
2127} 2132}
2128 2133
2134static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
2135{
2136 if (unlikely(PageSlabPfmemalloc(page)))
2137 return gfp_pfmemalloc_allowed(gfpflags);
2138
2139 return true;
2140}
2141
2129/* 2142/*
2130 * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist 2143 * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
2131 * or deactivate the page. 2144 * or deactivate the page.
@@ -2206,6 +2219,18 @@ redo:
2206 goto new_slab; 2219 goto new_slab;
2207 } 2220 }
2208 2221
2222 /*
2223 * By rights, we should be searching for a slab page that was
2224 * PFMEMALLOC but right now, we are losing the pfmemalloc
2225 * information when the page leaves the per-cpu allocator
2226 */
2227 if (unlikely(!pfmemalloc_match(page, gfpflags))) {
2228 deactivate_slab(s, page, c->freelist);
2229 c->page = NULL;
2230 c->freelist = NULL;
2231 goto new_slab;
2232 }
2233
2209 /* must check again c->freelist in case of cpu migration or IRQ */ 2234 /* must check again c->freelist in case of cpu migration or IRQ */
2210 freelist = c->freelist; 2235 freelist = c->freelist;
2211 if (freelist) 2236 if (freelist)
@@ -2312,8 +2337,8 @@ redo:
2312 2337
2313 object = c->freelist; 2338 object = c->freelist;
2314 page = c->page; 2339 page = c->page;
2315 if (unlikely(!object || !node_match(page, node))) 2340 if (unlikely(!object || !node_match(page, node) ||
2316 2341 !pfmemalloc_match(page, gfpflags)))
2317 object = __slab_alloc(s, gfpflags, node, addr, c); 2342 object = __slab_alloc(s, gfpflags, node, addr, c);
2318 2343
2319 else { 2344 else {