diff options
-rw-r--r-- | include/linux/mm_types.h | 9 | ||||
-rw-r--r-- | include/linux/page-flags.h | 29 | ||||
-rw-r--r-- | mm/internal.h | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 27 | ||||
-rw-r--r-- | mm/slab.c | 192 | ||||
-rw-r--r-- | mm/slub.c | 29 |
6 files changed, 264 insertions, 25 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 074eb98fe15..375e79eb009 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -54,6 +54,15 @@ struct page { | |||
54 | union { | 54 | union { |
55 | pgoff_t index; /* Our offset within mapping. */ | 55 | pgoff_t index; /* Our offset within mapping. */ |
56 | void *freelist; /* slub/slob first free object */ | 56 | void *freelist; /* slub/slob first free object */ |
57 | bool pfmemalloc; /* If set by the page allocator, | ||
58 | * ALLOC_PFMEMALLOC was set | ||
59 | * and the low watermark was not | ||
60 | * met implying that the system | ||
61 | * is under some pressure. The | ||
62 | * caller should try ensure | ||
63 | * this page is only used to | ||
64 | * free other pages. | ||
65 | */ | ||
57 | }; | 66 | }; |
58 | 67 | ||
59 | union { | 68 | union { |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c88d2a9451a..b5d13841604 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/types.h> | 8 | #include <linux/types.h> |
9 | #include <linux/bug.h> | 9 | #include <linux/bug.h> |
10 | #include <linux/mmdebug.h> | ||
10 | #ifndef __GENERATING_BOUNDS_H | 11 | #ifndef __GENERATING_BOUNDS_H |
11 | #include <linux/mm_types.h> | 12 | #include <linux/mm_types.h> |
12 | #include <generated/bounds.h> | 13 | #include <generated/bounds.h> |
@@ -453,6 +454,34 @@ static inline int PageTransTail(struct page *page) | |||
453 | } | 454 | } |
454 | #endif | 455 | #endif |
455 | 456 | ||
457 | /* | ||
458 | * If network-based swap is enabled, sl*b must keep track of whether pages | ||
459 | * were allocated from pfmemalloc reserves. | ||
460 | */ | ||
461 | static inline int PageSlabPfmemalloc(struct page *page) | ||
462 | { | ||
463 | VM_BUG_ON(!PageSlab(page)); | ||
464 | return PageActive(page); | ||
465 | } | ||
466 | |||
467 | static inline void SetPageSlabPfmemalloc(struct page *page) | ||
468 | { | ||
469 | VM_BUG_ON(!PageSlab(page)); | ||
470 | SetPageActive(page); | ||
471 | } | ||
472 | |||
473 | static inline void __ClearPageSlabPfmemalloc(struct page *page) | ||
474 | { | ||
475 | VM_BUG_ON(!PageSlab(page)); | ||
476 | __ClearPageActive(page); | ||
477 | } | ||
478 | |||
479 | static inline void ClearPageSlabPfmemalloc(struct page *page) | ||
480 | { | ||
481 | VM_BUG_ON(!PageSlab(page)); | ||
482 | ClearPageActive(page); | ||
483 | } | ||
484 | |||
456 | #ifdef CONFIG_MMU | 485 | #ifdef CONFIG_MMU |
457 | #define __PG_MLOCKED (1 << PG_mlocked) | 486 | #define __PG_MLOCKED (1 << PG_mlocked) |
458 | #else | 487 | #else |
diff --git a/mm/internal.h b/mm/internal.h index 3314f79d775..eb76b67890d 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -279,6 +279,9 @@ static inline struct page *mem_map_next(struct page *iter, | |||
279 | #define __paginginit __init | 279 | #define __paginginit __init |
280 | #endif | 280 | #endif |
281 | 281 | ||
282 | /* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ | ||
283 | bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); | ||
284 | |||
282 | /* Memory initialisation debug and verification */ | 285 | /* Memory initialisation debug and verification */ |
283 | enum mminit_level { | 286 | enum mminit_level { |
284 | MMINIT_WARNING, | 287 | MMINIT_WARNING, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6a29ed8e6e6..38e5be65f24 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1513,6 +1513,7 @@ failed: | |||
1513 | #define ALLOC_HARDER 0x10 /* try to alloc harder */ | 1513 | #define ALLOC_HARDER 0x10 /* try to alloc harder */ |
1514 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ | 1514 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ |
1515 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ | 1515 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ |
1516 | #define ALLOC_PFMEMALLOC 0x80 /* Caller has PF_MEMALLOC set */ | ||
1516 | 1517 | ||
1517 | #ifdef CONFIG_FAIL_PAGE_ALLOC | 1518 | #ifdef CONFIG_FAIL_PAGE_ALLOC |
1518 | 1519 | ||
@@ -2293,16 +2294,22 @@ gfp_to_alloc_flags(gfp_t gfp_mask) | |||
2293 | } else if (unlikely(rt_task(current)) && !in_interrupt()) | 2294 | } else if (unlikely(rt_task(current)) && !in_interrupt()) |
2294 | alloc_flags |= ALLOC_HARDER; | 2295 | alloc_flags |= ALLOC_HARDER; |
2295 | 2296 | ||
2296 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { | 2297 | if ((current->flags & PF_MEMALLOC) || |
2297 | if (!in_interrupt() && | 2298 | unlikely(test_thread_flag(TIF_MEMDIE))) { |
2298 | ((current->flags & PF_MEMALLOC) || | 2299 | alloc_flags |= ALLOC_PFMEMALLOC; |
2299 | unlikely(test_thread_flag(TIF_MEMDIE)))) | 2300 | |
2301 | if (likely(!(gfp_mask & __GFP_NOMEMALLOC)) && !in_interrupt()) | ||
2300 | alloc_flags |= ALLOC_NO_WATERMARKS; | 2302 | alloc_flags |= ALLOC_NO_WATERMARKS; |
2301 | } | 2303 | } |
2302 | 2304 | ||
2303 | return alloc_flags; | 2305 | return alloc_flags; |
2304 | } | 2306 | } |
2305 | 2307 | ||
2308 | bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) | ||
2309 | { | ||
2310 | return !!(gfp_to_alloc_flags(gfp_mask) & ALLOC_PFMEMALLOC); | ||
2311 | } | ||
2312 | |||
2306 | static inline struct page * | 2313 | static inline struct page * |
2307 | __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | 2314 | __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, |
2308 | struct zonelist *zonelist, enum zone_type high_zoneidx, | 2315 | struct zonelist *zonelist, enum zone_type high_zoneidx, |
@@ -2490,10 +2497,18 @@ nopage: | |||
2490 | warn_alloc_failed(gfp_mask, order, NULL); | 2497 | warn_alloc_failed(gfp_mask, order, NULL); |
2491 | return page; | 2498 | return page; |
2492 | got_pg: | 2499 | got_pg: |
2500 | /* | ||
2501 | * page->pfmemalloc is set when the caller had PFMEMALLOC set or is | ||
2502 | * been OOM killed. The expectation is that the caller is taking | ||
2503 | * steps that will free more memory. The caller should avoid the | ||
2504 | * page being used for !PFMEMALLOC purposes. | ||
2505 | */ | ||
2506 | page->pfmemalloc = !!(alloc_flags & ALLOC_PFMEMALLOC); | ||
2507 | |||
2493 | if (kmemcheck_enabled) | 2508 | if (kmemcheck_enabled) |
2494 | kmemcheck_pagealloc_alloc(page, order, gfp_mask); | 2509 | kmemcheck_pagealloc_alloc(page, order, gfp_mask); |
2495 | return page; | ||
2496 | 2510 | ||
2511 | return page; | ||
2497 | } | 2512 | } |
2498 | 2513 | ||
2499 | /* | 2514 | /* |
@@ -2544,6 +2559,8 @@ retry_cpuset: | |||
2544 | page = __alloc_pages_slowpath(gfp_mask, order, | 2559 | page = __alloc_pages_slowpath(gfp_mask, order, |
2545 | zonelist, high_zoneidx, nodemask, | 2560 | zonelist, high_zoneidx, nodemask, |
2546 | preferred_zone, migratetype); | 2561 | preferred_zone, migratetype); |
2562 | else | ||
2563 | page->pfmemalloc = false; | ||
2547 | 2564 | ||
2548 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); | 2565 | trace_mm_page_alloc(page, order, gfp_mask, migratetype); |
2549 | 2566 | ||
@@ -124,6 +124,8 @@ | |||
124 | 124 | ||
125 | #include <trace/events/kmem.h> | 125 | #include <trace/events/kmem.h> |
126 | 126 | ||
127 | #include "internal.h" | ||
128 | |||
127 | /* | 129 | /* |
128 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. | 130 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. |
129 | * 0 for faster, smaller code (especially in the critical paths). | 131 | * 0 for faster, smaller code (especially in the critical paths). |
@@ -152,6 +154,12 @@ | |||
152 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN | 154 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN |
153 | #endif | 155 | #endif |
154 | 156 | ||
157 | /* | ||
158 | * true if a page was allocated from pfmemalloc reserves for network-based | ||
159 | * swap | ||
160 | */ | ||
161 | static bool pfmemalloc_active __read_mostly; | ||
162 | |||
155 | /* Legal flag mask for kmem_cache_create(). */ | 163 | /* Legal flag mask for kmem_cache_create(). */ |
156 | #if DEBUG | 164 | #if DEBUG |
157 | # define CREATE_MASK (SLAB_RED_ZONE | \ | 165 | # define CREATE_MASK (SLAB_RED_ZONE | \ |
@@ -257,9 +265,30 @@ struct array_cache { | |||
257 | * Must have this definition in here for the proper | 265 | * Must have this definition in here for the proper |
258 | * alignment of array_cache. Also simplifies accessing | 266 | * alignment of array_cache. Also simplifies accessing |
259 | * the entries. | 267 | * the entries. |
268 | * | ||
269 | * Entries should not be directly dereferenced as | ||
270 | * entries belonging to slabs marked pfmemalloc will | ||
271 | * have the lower bits set SLAB_OBJ_PFMEMALLOC | ||
260 | */ | 272 | */ |
261 | }; | 273 | }; |
262 | 274 | ||
275 | #define SLAB_OBJ_PFMEMALLOC 1 | ||
276 | static inline bool is_obj_pfmemalloc(void *objp) | ||
277 | { | ||
278 | return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; | ||
279 | } | ||
280 | |||
281 | static inline void set_obj_pfmemalloc(void **objp) | ||
282 | { | ||
283 | *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); | ||
284 | return; | ||
285 | } | ||
286 | |||
287 | static inline void clear_obj_pfmemalloc(void **objp) | ||
288 | { | ||
289 | *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); | ||
290 | } | ||
291 | |||
263 | /* | 292 | /* |
264 | * bootstrap: The caches do not work without cpuarrays anymore, but the | 293 | * bootstrap: The caches do not work without cpuarrays anymore, but the |
265 | * cpuarrays are allocated from the generic caches... | 294 | * cpuarrays are allocated from the generic caches... |
@@ -900,6 +929,102 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
900 | return nc; | 929 | return nc; |
901 | } | 930 | } |
902 | 931 | ||
932 | static inline bool is_slab_pfmemalloc(struct slab *slabp) | ||
933 | { | ||
934 | struct page *page = virt_to_page(slabp->s_mem); | ||
935 | |||
936 | return PageSlabPfmemalloc(page); | ||
937 | } | ||
938 | |||
939 | /* Clears pfmemalloc_active if no slabs have pfmalloc set */ | ||
940 | static void recheck_pfmemalloc_active(struct kmem_cache *cachep, | ||
941 | struct array_cache *ac) | ||
942 | { | ||
943 | struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; | ||
944 | struct slab *slabp; | ||
945 | unsigned long flags; | ||
946 | |||
947 | if (!pfmemalloc_active) | ||
948 | return; | ||
949 | |||
950 | spin_lock_irqsave(&l3->list_lock, flags); | ||
951 | list_for_each_entry(slabp, &l3->slabs_full, list) | ||
952 | if (is_slab_pfmemalloc(slabp)) | ||
953 | goto out; | ||
954 | |||
955 | list_for_each_entry(slabp, &l3->slabs_partial, list) | ||
956 | if (is_slab_pfmemalloc(slabp)) | ||
957 | goto out; | ||
958 | |||
959 | list_for_each_entry(slabp, &l3->slabs_free, list) | ||
960 | if (is_slab_pfmemalloc(slabp)) | ||
961 | goto out; | ||
962 | |||
963 | pfmemalloc_active = false; | ||
964 | out: | ||
965 | spin_unlock_irqrestore(&l3->list_lock, flags); | ||
966 | } | ||
967 | |||
968 | static void *ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, | ||
969 | gfp_t flags, bool force_refill) | ||
970 | { | ||
971 | int i; | ||
972 | void *objp = ac->entry[--ac->avail]; | ||
973 | |||
974 | /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ | ||
975 | if (unlikely(is_obj_pfmemalloc(objp))) { | ||
976 | struct kmem_list3 *l3; | ||
977 | |||
978 | if (gfp_pfmemalloc_allowed(flags)) { | ||
979 | clear_obj_pfmemalloc(&objp); | ||
980 | return objp; | ||
981 | } | ||
982 | |||
983 | /* The caller cannot use PFMEMALLOC objects, find another one */ | ||
984 | for (i = 1; i < ac->avail; i++) { | ||
985 | /* If a !PFMEMALLOC object is found, swap them */ | ||
986 | if (!is_obj_pfmemalloc(ac->entry[i])) { | ||
987 | objp = ac->entry[i]; | ||
988 | ac->entry[i] = ac->entry[ac->avail]; | ||
989 | ac->entry[ac->avail] = objp; | ||
990 | return objp; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | /* | ||
995 | * If there are empty slabs on the slabs_free list and we are | ||
996 | * being forced to refill the cache, mark this one !pfmemalloc. | ||
997 | */ | ||
998 | l3 = cachep->nodelists[numa_mem_id()]; | ||
999 | if (!list_empty(&l3->slabs_free) && force_refill) { | ||
1000 | struct slab *slabp = virt_to_slab(objp); | ||
1001 | ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem)); | ||
1002 | clear_obj_pfmemalloc(&objp); | ||
1003 | recheck_pfmemalloc_active(cachep, ac); | ||
1004 | return objp; | ||
1005 | } | ||
1006 | |||
1007 | /* No !PFMEMALLOC objects available */ | ||
1008 | ac->avail++; | ||
1009 | objp = NULL; | ||
1010 | } | ||
1011 | |||
1012 | return objp; | ||
1013 | } | ||
1014 | |||
1015 | static void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, | ||
1016 | void *objp) | ||
1017 | { | ||
1018 | if (unlikely(pfmemalloc_active)) { | ||
1019 | /* Some pfmemalloc slabs exist, check if this is one */ | ||
1020 | struct page *page = virt_to_page(objp); | ||
1021 | if (PageSlabPfmemalloc(page)) | ||
1022 | set_obj_pfmemalloc(&objp); | ||
1023 | } | ||
1024 | |||
1025 | ac->entry[ac->avail++] = objp; | ||
1026 | } | ||
1027 | |||
903 | /* | 1028 | /* |
904 | * Transfer objects in one arraycache to another. | 1029 | * Transfer objects in one arraycache to another. |
905 | * Locking must be handled by the caller. | 1030 | * Locking must be handled by the caller. |
@@ -1076,7 +1201,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1076 | STATS_INC_ACOVERFLOW(cachep); | 1201 | STATS_INC_ACOVERFLOW(cachep); |
1077 | __drain_alien_cache(cachep, alien, nodeid); | 1202 | __drain_alien_cache(cachep, alien, nodeid); |
1078 | } | 1203 | } |
1079 | alien->entry[alien->avail++] = objp; | 1204 | ac_put_obj(cachep, alien, objp); |
1080 | spin_unlock(&alien->lock); | 1205 | spin_unlock(&alien->lock); |
1081 | } else { | 1206 | } else { |
1082 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); | 1207 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); |
@@ -1759,6 +1884,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1759 | return NULL; | 1884 | return NULL; |
1760 | } | 1885 | } |
1761 | 1886 | ||
1887 | /* Record if ALLOC_PFMEMALLOC was set when allocating the slab */ | ||
1888 | if (unlikely(page->pfmemalloc)) | ||
1889 | pfmemalloc_active = true; | ||
1890 | |||
1762 | nr_pages = (1 << cachep->gfporder); | 1891 | nr_pages = (1 << cachep->gfporder); |
1763 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1892 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1764 | add_zone_page_state(page_zone(page), | 1893 | add_zone_page_state(page_zone(page), |
@@ -1766,9 +1895,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1766 | else | 1895 | else |
1767 | add_zone_page_state(page_zone(page), | 1896 | add_zone_page_state(page_zone(page), |
1768 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1897 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
1769 | for (i = 0; i < nr_pages; i++) | 1898 | for (i = 0; i < nr_pages; i++) { |
1770 | __SetPageSlab(page + i); | 1899 | __SetPageSlab(page + i); |
1771 | 1900 | ||
1901 | if (page->pfmemalloc) | ||
1902 | SetPageSlabPfmemalloc(page + i); | ||
1903 | } | ||
1904 | |||
1772 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | 1905 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { |
1773 | kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); | 1906 | kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); |
1774 | 1907 | ||
@@ -1800,6 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1800 | NR_SLAB_UNRECLAIMABLE, nr_freed); | 1933 | NR_SLAB_UNRECLAIMABLE, nr_freed); |
1801 | while (i--) { | 1934 | while (i--) { |
1802 | BUG_ON(!PageSlab(page)); | 1935 | BUG_ON(!PageSlab(page)); |
1936 | __ClearPageSlabPfmemalloc(page); | ||
1803 | __ClearPageSlab(page); | 1937 | __ClearPageSlab(page); |
1804 | page++; | 1938 | page++; |
1805 | } | 1939 | } |
@@ -3015,16 +3149,19 @@ bad: | |||
3015 | #define check_slabp(x,y) do { } while(0) | 3149 | #define check_slabp(x,y) do { } while(0) |
3016 | #endif | 3150 | #endif |
3017 | 3151 | ||
3018 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | 3152 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, |
3153 | bool force_refill) | ||
3019 | { | 3154 | { |
3020 | int batchcount; | 3155 | int batchcount; |
3021 | struct kmem_list3 *l3; | 3156 | struct kmem_list3 *l3; |
3022 | struct array_cache *ac; | 3157 | struct array_cache *ac; |
3023 | int node; | 3158 | int node; |
3024 | 3159 | ||
3025 | retry: | ||
3026 | check_irq_off(); | 3160 | check_irq_off(); |
3027 | node = numa_mem_id(); | 3161 | node = numa_mem_id(); |
3162 | if (unlikely(force_refill)) | ||
3163 | goto force_grow; | ||
3164 | retry: | ||
3028 | ac = cpu_cache_get(cachep); | 3165 | ac = cpu_cache_get(cachep); |
3029 | batchcount = ac->batchcount; | 3166 | batchcount = ac->batchcount; |
3030 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 3167 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
@@ -3074,8 +3211,8 @@ retry: | |||
3074 | STATS_INC_ACTIVE(cachep); | 3211 | STATS_INC_ACTIVE(cachep); |
3075 | STATS_SET_HIGH(cachep); | 3212 | STATS_SET_HIGH(cachep); |
3076 | 3213 | ||
3077 | ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, | 3214 | ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, |
3078 | node); | 3215 | node)); |
3079 | } | 3216 | } |
3080 | check_slabp(cachep, slabp); | 3217 | check_slabp(cachep, slabp); |
3081 | 3218 | ||
@@ -3094,18 +3231,22 @@ alloc_done: | |||
3094 | 3231 | ||
3095 | if (unlikely(!ac->avail)) { | 3232 | if (unlikely(!ac->avail)) { |
3096 | int x; | 3233 | int x; |
3234 | force_grow: | ||
3097 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); | 3235 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); |
3098 | 3236 | ||
3099 | /* cache_grow can reenable interrupts, then ac could change. */ | 3237 | /* cache_grow can reenable interrupts, then ac could change. */ |
3100 | ac = cpu_cache_get(cachep); | 3238 | ac = cpu_cache_get(cachep); |
3101 | if (!x && ac->avail == 0) /* no objects in sight? abort */ | 3239 | |
3240 | /* no objects in sight? abort */ | ||
3241 | if (!x && (ac->avail == 0 || force_refill)) | ||
3102 | return NULL; | 3242 | return NULL; |
3103 | 3243 | ||
3104 | if (!ac->avail) /* objects refilled by interrupt? */ | 3244 | if (!ac->avail) /* objects refilled by interrupt? */ |
3105 | goto retry; | 3245 | goto retry; |
3106 | } | 3246 | } |
3107 | ac->touched = 1; | 3247 | ac->touched = 1; |
3108 | return ac->entry[--ac->avail]; | 3248 | |
3249 | return ac_get_obj(cachep, ac, flags, force_refill); | ||
3109 | } | 3250 | } |
3110 | 3251 | ||
3111 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | 3252 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
@@ -3187,23 +3328,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3187 | { | 3328 | { |
3188 | void *objp; | 3329 | void *objp; |
3189 | struct array_cache *ac; | 3330 | struct array_cache *ac; |
3331 | bool force_refill = false; | ||
3190 | 3332 | ||
3191 | check_irq_off(); | 3333 | check_irq_off(); |
3192 | 3334 | ||
3193 | ac = cpu_cache_get(cachep); | 3335 | ac = cpu_cache_get(cachep); |
3194 | if (likely(ac->avail)) { | 3336 | if (likely(ac->avail)) { |
3195 | STATS_INC_ALLOCHIT(cachep); | ||
3196 | ac->touched = 1; | 3337 | ac->touched = 1; |
3197 | objp = ac->entry[--ac->avail]; | 3338 | objp = ac_get_obj(cachep, ac, flags, false); |
3198 | } else { | 3339 | |
3199 | STATS_INC_ALLOCMISS(cachep); | ||
3200 | objp = cache_alloc_refill(cachep, flags); | ||
3201 | /* | 3340 | /* |
3202 | * the 'ac' may be updated by cache_alloc_refill(), | 3341 | * Allow for the possibility all avail objects are not allowed |
3203 | * and kmemleak_erase() requires its correct value. | 3342 | * by the current flags |
3204 | */ | 3343 | */ |
3205 | ac = cpu_cache_get(cachep); | 3344 | if (objp) { |
3345 | STATS_INC_ALLOCHIT(cachep); | ||
3346 | goto out; | ||
3347 | } | ||
3348 | force_refill = true; | ||
3206 | } | 3349 | } |
3350 | |||
3351 | STATS_INC_ALLOCMISS(cachep); | ||
3352 | objp = cache_alloc_refill(cachep, flags, force_refill); | ||
3353 | /* | ||
3354 | * the 'ac' may be updated by cache_alloc_refill(), | ||
3355 | * and kmemleak_erase() requires its correct value. | ||
3356 | */ | ||
3357 | ac = cpu_cache_get(cachep); | ||
3358 | |||
3359 | out: | ||
3207 | /* | 3360 | /* |
3208 | * To avoid a false negative, if an object that is in one of the | 3361 | * To avoid a false negative, if an object that is in one of the |
3209 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't | 3362 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't |
@@ -3525,9 +3678,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
3525 | struct kmem_list3 *l3; | 3678 | struct kmem_list3 *l3; |
3526 | 3679 | ||
3527 | for (i = 0; i < nr_objects; i++) { | 3680 | for (i = 0; i < nr_objects; i++) { |
3528 | void *objp = objpp[i]; | 3681 | void *objp; |
3529 | struct slab *slabp; | 3682 | struct slab *slabp; |
3530 | 3683 | ||
3684 | clear_obj_pfmemalloc(&objpp[i]); | ||
3685 | objp = objpp[i]; | ||
3686 | |||
3531 | slabp = virt_to_slab(objp); | 3687 | slabp = virt_to_slab(objp); |
3532 | l3 = cachep->nodelists[node]; | 3688 | l3 = cachep->nodelists[node]; |
3533 | list_del(&slabp->list); | 3689 | list_del(&slabp->list); |
@@ -3645,7 +3801,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, | |||
3645 | cache_flusharray(cachep, ac); | 3801 | cache_flusharray(cachep, ac); |
3646 | } | 3802 | } |
3647 | 3803 | ||
3648 | ac->entry[ac->avail++] = objp; | 3804 | ac_put_obj(cachep, ac, objp); |
3649 | } | 3805 | } |
3650 | 3806 | ||
3651 | /** | 3807 | /** |
@@ -34,6 +34,8 @@ | |||
34 | 34 | ||
35 | #include <trace/events/kmem.h> | 35 | #include <trace/events/kmem.h> |
36 | 36 | ||
37 | #include "internal.h" | ||
38 | |||
37 | /* | 39 | /* |
38 | * Lock order: | 40 | * Lock order: |
39 | * 1. slab_mutex (Global Mutex) | 41 | * 1. slab_mutex (Global Mutex) |
@@ -1354,6 +1356,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1354 | inc_slabs_node(s, page_to_nid(page), page->objects); | 1356 | inc_slabs_node(s, page_to_nid(page), page->objects); |
1355 | page->slab = s; | 1357 | page->slab = s; |
1356 | __SetPageSlab(page); | 1358 | __SetPageSlab(page); |
1359 | if (page->pfmemalloc) | ||
1360 | SetPageSlabPfmemalloc(page); | ||
1357 | 1361 | ||
1358 | start = page_address(page); | 1362 | start = page_address(page); |
1359 | 1363 | ||
@@ -1397,6 +1401,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1397 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1401 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
1398 | -pages); | 1402 | -pages); |
1399 | 1403 | ||
1404 | __ClearPageSlabPfmemalloc(page); | ||
1400 | __ClearPageSlab(page); | 1405 | __ClearPageSlab(page); |
1401 | reset_page_mapcount(page); | 1406 | reset_page_mapcount(page); |
1402 | if (current->reclaim_state) | 1407 | if (current->reclaim_state) |
@@ -2126,6 +2131,14 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, | |||
2126 | return freelist; | 2131 | return freelist; |
2127 | } | 2132 | } |
2128 | 2133 | ||
2134 | static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags) | ||
2135 | { | ||
2136 | if (unlikely(PageSlabPfmemalloc(page))) | ||
2137 | return gfp_pfmemalloc_allowed(gfpflags); | ||
2138 | |||
2139 | return true; | ||
2140 | } | ||
2141 | |||
2129 | /* | 2142 | /* |
2130 | * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist | 2143 | * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist |
2131 | * or deactivate the page. | 2144 | * or deactivate the page. |
@@ -2206,6 +2219,18 @@ redo: | |||
2206 | goto new_slab; | 2219 | goto new_slab; |
2207 | } | 2220 | } |
2208 | 2221 | ||
2222 | /* | ||
2223 | * By rights, we should be searching for a slab page that was | ||
2224 | * PFMEMALLOC but right now, we are losing the pfmemalloc | ||
2225 | * information when the page leaves the per-cpu allocator | ||
2226 | */ | ||
2227 | if (unlikely(!pfmemalloc_match(page, gfpflags))) { | ||
2228 | deactivate_slab(s, page, c->freelist); | ||
2229 | c->page = NULL; | ||
2230 | c->freelist = NULL; | ||
2231 | goto new_slab; | ||
2232 | } | ||
2233 | |||
2209 | /* must check again c->freelist in case of cpu migration or IRQ */ | 2234 | /* must check again c->freelist in case of cpu migration or IRQ */ |
2210 | freelist = c->freelist; | 2235 | freelist = c->freelist; |
2211 | if (freelist) | 2236 | if (freelist) |
@@ -2312,8 +2337,8 @@ redo: | |||
2312 | 2337 | ||
2313 | object = c->freelist; | 2338 | object = c->freelist; |
2314 | page = c->page; | 2339 | page = c->page; |
2315 | if (unlikely(!object || !node_match(page, node))) | 2340 | if (unlikely(!object || !node_match(page, node) || |
2316 | 2341 | !pfmemalloc_match(page, gfpflags))) | |
2317 | object = __slab_alloc(s, gfpflags, node, addr, c); | 2342 | object = __slab_alloc(s, gfpflags, node, addr, c); |
2318 | 2343 | ||
2319 | else { | 2344 | else { |