diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 192 |
1 files changed, 174 insertions, 18 deletions
@@ -124,6 +124,8 @@ | |||
124 | 124 | ||
125 | #include <trace/events/kmem.h> | 125 | #include <trace/events/kmem.h> |
126 | 126 | ||
127 | #include "internal.h" | ||
128 | |||
127 | /* | 129 | /* |
128 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. | 130 | * DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON. |
129 | * 0 for faster, smaller code (especially in the critical paths). | 131 | * 0 for faster, smaller code (especially in the critical paths). |
@@ -152,6 +154,12 @@ | |||
152 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN | 154 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN |
153 | #endif | 155 | #endif |
154 | 156 | ||
157 | /* | ||
158 | * true if a page was allocated from pfmemalloc reserves for network-based | ||
159 | * swap | ||
160 | */ | ||
161 | static bool pfmemalloc_active __read_mostly; | ||
162 | |||
155 | /* Legal flag mask for kmem_cache_create(). */ | 163 | /* Legal flag mask for kmem_cache_create(). */ |
156 | #if DEBUG | 164 | #if DEBUG |
157 | # define CREATE_MASK (SLAB_RED_ZONE | \ | 165 | # define CREATE_MASK (SLAB_RED_ZONE | \ |
@@ -257,9 +265,30 @@ struct array_cache { | |||
257 | * Must have this definition in here for the proper | 265 | * Must have this definition in here for the proper |
258 | * alignment of array_cache. Also simplifies accessing | 266 | * alignment of array_cache. Also simplifies accessing |
259 | * the entries. | 267 | * the entries. |
268 | * | ||
269 | * Entries should not be directly dereferenced as | ||
270 | * entries belonging to slabs marked pfmemalloc will | ||
271 | * have the lower bits set SLAB_OBJ_PFMEMALLOC | ||
260 | */ | 272 | */ |
261 | }; | 273 | }; |
262 | 274 | ||
275 | #define SLAB_OBJ_PFMEMALLOC 1 | ||
276 | static inline bool is_obj_pfmemalloc(void *objp) | ||
277 | { | ||
278 | return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC; | ||
279 | } | ||
280 | |||
281 | static inline void set_obj_pfmemalloc(void **objp) | ||
282 | { | ||
283 | *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC); | ||
284 | return; | ||
285 | } | ||
286 | |||
287 | static inline void clear_obj_pfmemalloc(void **objp) | ||
288 | { | ||
289 | *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC); | ||
290 | } | ||
291 | |||
263 | /* | 292 | /* |
264 | * bootstrap: The caches do not work without cpuarrays anymore, but the | 293 | * bootstrap: The caches do not work without cpuarrays anymore, but the |
265 | * cpuarrays are allocated from the generic caches... | 294 | * cpuarrays are allocated from the generic caches... |
@@ -900,6 +929,102 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
900 | return nc; | 929 | return nc; |
901 | } | 930 | } |
902 | 931 | ||
932 | static inline bool is_slab_pfmemalloc(struct slab *slabp) | ||
933 | { | ||
934 | struct page *page = virt_to_page(slabp->s_mem); | ||
935 | |||
936 | return PageSlabPfmemalloc(page); | ||
937 | } | ||
938 | |||
939 | /* Clears pfmemalloc_active if no slabs have pfmalloc set */ | ||
940 | static void recheck_pfmemalloc_active(struct kmem_cache *cachep, | ||
941 | struct array_cache *ac) | ||
942 | { | ||
943 | struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; | ||
944 | struct slab *slabp; | ||
945 | unsigned long flags; | ||
946 | |||
947 | if (!pfmemalloc_active) | ||
948 | return; | ||
949 | |||
950 | spin_lock_irqsave(&l3->list_lock, flags); | ||
951 | list_for_each_entry(slabp, &l3->slabs_full, list) | ||
952 | if (is_slab_pfmemalloc(slabp)) | ||
953 | goto out; | ||
954 | |||
955 | list_for_each_entry(slabp, &l3->slabs_partial, list) | ||
956 | if (is_slab_pfmemalloc(slabp)) | ||
957 | goto out; | ||
958 | |||
959 | list_for_each_entry(slabp, &l3->slabs_free, list) | ||
960 | if (is_slab_pfmemalloc(slabp)) | ||
961 | goto out; | ||
962 | |||
963 | pfmemalloc_active = false; | ||
964 | out: | ||
965 | spin_unlock_irqrestore(&l3->list_lock, flags); | ||
966 | } | ||
967 | |||
968 | static void *ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, | ||
969 | gfp_t flags, bool force_refill) | ||
970 | { | ||
971 | int i; | ||
972 | void *objp = ac->entry[--ac->avail]; | ||
973 | |||
974 | /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ | ||
975 | if (unlikely(is_obj_pfmemalloc(objp))) { | ||
976 | struct kmem_list3 *l3; | ||
977 | |||
978 | if (gfp_pfmemalloc_allowed(flags)) { | ||
979 | clear_obj_pfmemalloc(&objp); | ||
980 | return objp; | ||
981 | } | ||
982 | |||
983 | /* The caller cannot use PFMEMALLOC objects, find another one */ | ||
984 | for (i = 1; i < ac->avail; i++) { | ||
985 | /* If a !PFMEMALLOC object is found, swap them */ | ||
986 | if (!is_obj_pfmemalloc(ac->entry[i])) { | ||
987 | objp = ac->entry[i]; | ||
988 | ac->entry[i] = ac->entry[ac->avail]; | ||
989 | ac->entry[ac->avail] = objp; | ||
990 | return objp; | ||
991 | } | ||
992 | } | ||
993 | |||
994 | /* | ||
995 | * If there are empty slabs on the slabs_free list and we are | ||
996 | * being forced to refill the cache, mark this one !pfmemalloc. | ||
997 | */ | ||
998 | l3 = cachep->nodelists[numa_mem_id()]; | ||
999 | if (!list_empty(&l3->slabs_free) && force_refill) { | ||
1000 | struct slab *slabp = virt_to_slab(objp); | ||
1001 | ClearPageSlabPfmemalloc(virt_to_page(slabp->s_mem)); | ||
1002 | clear_obj_pfmemalloc(&objp); | ||
1003 | recheck_pfmemalloc_active(cachep, ac); | ||
1004 | return objp; | ||
1005 | } | ||
1006 | |||
1007 | /* No !PFMEMALLOC objects available */ | ||
1008 | ac->avail++; | ||
1009 | objp = NULL; | ||
1010 | } | ||
1011 | |||
1012 | return objp; | ||
1013 | } | ||
1014 | |||
1015 | static void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac, | ||
1016 | void *objp) | ||
1017 | { | ||
1018 | if (unlikely(pfmemalloc_active)) { | ||
1019 | /* Some pfmemalloc slabs exist, check if this is one */ | ||
1020 | struct page *page = virt_to_page(objp); | ||
1021 | if (PageSlabPfmemalloc(page)) | ||
1022 | set_obj_pfmemalloc(&objp); | ||
1023 | } | ||
1024 | |||
1025 | ac->entry[ac->avail++] = objp; | ||
1026 | } | ||
1027 | |||
903 | /* | 1028 | /* |
904 | * Transfer objects in one arraycache to another. | 1029 | * Transfer objects in one arraycache to another. |
905 | * Locking must be handled by the caller. | 1030 | * Locking must be handled by the caller. |
@@ -1076,7 +1201,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1076 | STATS_INC_ACOVERFLOW(cachep); | 1201 | STATS_INC_ACOVERFLOW(cachep); |
1077 | __drain_alien_cache(cachep, alien, nodeid); | 1202 | __drain_alien_cache(cachep, alien, nodeid); |
1078 | } | 1203 | } |
1079 | alien->entry[alien->avail++] = objp; | 1204 | ac_put_obj(cachep, alien, objp); |
1080 | spin_unlock(&alien->lock); | 1205 | spin_unlock(&alien->lock); |
1081 | } else { | 1206 | } else { |
1082 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); | 1207 | spin_lock(&(cachep->nodelists[nodeid])->list_lock); |
@@ -1759,6 +1884,10 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1759 | return NULL; | 1884 | return NULL; |
1760 | } | 1885 | } |
1761 | 1886 | ||
1887 | /* Record if ALLOC_PFMEMALLOC was set when allocating the slab */ | ||
1888 | if (unlikely(page->pfmemalloc)) | ||
1889 | pfmemalloc_active = true; | ||
1890 | |||
1762 | nr_pages = (1 << cachep->gfporder); | 1891 | nr_pages = (1 << cachep->gfporder); |
1763 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1892 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1764 | add_zone_page_state(page_zone(page), | 1893 | add_zone_page_state(page_zone(page), |
@@ -1766,9 +1895,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1766 | else | 1895 | else |
1767 | add_zone_page_state(page_zone(page), | 1896 | add_zone_page_state(page_zone(page), |
1768 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1897 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
1769 | for (i = 0; i < nr_pages; i++) | 1898 | for (i = 0; i < nr_pages; i++) { |
1770 | __SetPageSlab(page + i); | 1899 | __SetPageSlab(page + i); |
1771 | 1900 | ||
1901 | if (page->pfmemalloc) | ||
1902 | SetPageSlabPfmemalloc(page + i); | ||
1903 | } | ||
1904 | |||
1772 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | 1905 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { |
1773 | kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); | 1906 | kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); |
1774 | 1907 | ||
@@ -1800,6 +1933,7 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1800 | NR_SLAB_UNRECLAIMABLE, nr_freed); | 1933 | NR_SLAB_UNRECLAIMABLE, nr_freed); |
1801 | while (i--) { | 1934 | while (i--) { |
1802 | BUG_ON(!PageSlab(page)); | 1935 | BUG_ON(!PageSlab(page)); |
1936 | __ClearPageSlabPfmemalloc(page); | ||
1803 | __ClearPageSlab(page); | 1937 | __ClearPageSlab(page); |
1804 | page++; | 1938 | page++; |
1805 | } | 1939 | } |
@@ -3015,16 +3149,19 @@ bad: | |||
3015 | #define check_slabp(x,y) do { } while(0) | 3149 | #define check_slabp(x,y) do { } while(0) |
3016 | #endif | 3150 | #endif |
3017 | 3151 | ||
3018 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | 3152 | static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, |
3153 | bool force_refill) | ||
3019 | { | 3154 | { |
3020 | int batchcount; | 3155 | int batchcount; |
3021 | struct kmem_list3 *l3; | 3156 | struct kmem_list3 *l3; |
3022 | struct array_cache *ac; | 3157 | struct array_cache *ac; |
3023 | int node; | 3158 | int node; |
3024 | 3159 | ||
3025 | retry: | ||
3026 | check_irq_off(); | 3160 | check_irq_off(); |
3027 | node = numa_mem_id(); | 3161 | node = numa_mem_id(); |
3162 | if (unlikely(force_refill)) | ||
3163 | goto force_grow; | ||
3164 | retry: | ||
3028 | ac = cpu_cache_get(cachep); | 3165 | ac = cpu_cache_get(cachep); |
3029 | batchcount = ac->batchcount; | 3166 | batchcount = ac->batchcount; |
3030 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 3167 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
@@ -3074,8 +3211,8 @@ retry: | |||
3074 | STATS_INC_ACTIVE(cachep); | 3211 | STATS_INC_ACTIVE(cachep); |
3075 | STATS_SET_HIGH(cachep); | 3212 | STATS_SET_HIGH(cachep); |
3076 | 3213 | ||
3077 | ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, | 3214 | ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, |
3078 | node); | 3215 | node)); |
3079 | } | 3216 | } |
3080 | check_slabp(cachep, slabp); | 3217 | check_slabp(cachep, slabp); |
3081 | 3218 | ||
@@ -3094,18 +3231,22 @@ alloc_done: | |||
3094 | 3231 | ||
3095 | if (unlikely(!ac->avail)) { | 3232 | if (unlikely(!ac->avail)) { |
3096 | int x; | 3233 | int x; |
3234 | force_grow: | ||
3097 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); | 3235 | x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL); |
3098 | 3236 | ||
3099 | /* cache_grow can reenable interrupts, then ac could change. */ | 3237 | /* cache_grow can reenable interrupts, then ac could change. */ |
3100 | ac = cpu_cache_get(cachep); | 3238 | ac = cpu_cache_get(cachep); |
3101 | if (!x && ac->avail == 0) /* no objects in sight? abort */ | 3239 | |
3240 | /* no objects in sight? abort */ | ||
3241 | if (!x && (ac->avail == 0 || force_refill)) | ||
3102 | return NULL; | 3242 | return NULL; |
3103 | 3243 | ||
3104 | if (!ac->avail) /* objects refilled by interrupt? */ | 3244 | if (!ac->avail) /* objects refilled by interrupt? */ |
3105 | goto retry; | 3245 | goto retry; |
3106 | } | 3246 | } |
3107 | ac->touched = 1; | 3247 | ac->touched = 1; |
3108 | return ac->entry[--ac->avail]; | 3248 | |
3249 | return ac_get_obj(cachep, ac, flags, force_refill); | ||
3109 | } | 3250 | } |
3110 | 3251 | ||
3111 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | 3252 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
@@ -3187,23 +3328,35 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3187 | { | 3328 | { |
3188 | void *objp; | 3329 | void *objp; |
3189 | struct array_cache *ac; | 3330 | struct array_cache *ac; |
3331 | bool force_refill = false; | ||
3190 | 3332 | ||
3191 | check_irq_off(); | 3333 | check_irq_off(); |
3192 | 3334 | ||
3193 | ac = cpu_cache_get(cachep); | 3335 | ac = cpu_cache_get(cachep); |
3194 | if (likely(ac->avail)) { | 3336 | if (likely(ac->avail)) { |
3195 | STATS_INC_ALLOCHIT(cachep); | ||
3196 | ac->touched = 1; | 3337 | ac->touched = 1; |
3197 | objp = ac->entry[--ac->avail]; | 3338 | objp = ac_get_obj(cachep, ac, flags, false); |
3198 | } else { | 3339 | |
3199 | STATS_INC_ALLOCMISS(cachep); | ||
3200 | objp = cache_alloc_refill(cachep, flags); | ||
3201 | /* | 3340 | /* |
3202 | * the 'ac' may be updated by cache_alloc_refill(), | 3341 | * Allow for the possibility all avail objects are not allowed |
3203 | * and kmemleak_erase() requires its correct value. | 3342 | * by the current flags |
3204 | */ | 3343 | */ |
3205 | ac = cpu_cache_get(cachep); | 3344 | if (objp) { |
3345 | STATS_INC_ALLOCHIT(cachep); | ||
3346 | goto out; | ||
3347 | } | ||
3348 | force_refill = true; | ||
3206 | } | 3349 | } |
3350 | |||
3351 | STATS_INC_ALLOCMISS(cachep); | ||
3352 | objp = cache_alloc_refill(cachep, flags, force_refill); | ||
3353 | /* | ||
3354 | * the 'ac' may be updated by cache_alloc_refill(), | ||
3355 | * and kmemleak_erase() requires its correct value. | ||
3356 | */ | ||
3357 | ac = cpu_cache_get(cachep); | ||
3358 | |||
3359 | out: | ||
3207 | /* | 3360 | /* |
3208 | * To avoid a false negative, if an object that is in one of the | 3361 | * To avoid a false negative, if an object that is in one of the |
3209 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't | 3362 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't |
@@ -3525,9 +3678,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
3525 | struct kmem_list3 *l3; | 3678 | struct kmem_list3 *l3; |
3526 | 3679 | ||
3527 | for (i = 0; i < nr_objects; i++) { | 3680 | for (i = 0; i < nr_objects; i++) { |
3528 | void *objp = objpp[i]; | 3681 | void *objp; |
3529 | struct slab *slabp; | 3682 | struct slab *slabp; |
3530 | 3683 | ||
3684 | clear_obj_pfmemalloc(&objpp[i]); | ||
3685 | objp = objpp[i]; | ||
3686 | |||
3531 | slabp = virt_to_slab(objp); | 3687 | slabp = virt_to_slab(objp); |
3532 | l3 = cachep->nodelists[node]; | 3688 | l3 = cachep->nodelists[node]; |
3533 | list_del(&slabp->list); | 3689 | list_del(&slabp->list); |
@@ -3645,7 +3801,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp, | |||
3645 | cache_flusharray(cachep, ac); | 3801 | cache_flusharray(cachep, ac); |
3646 | } | 3802 | } |
3647 | 3803 | ||
3648 | ac->entry[ac->avail++] = objp; | 3804 | ac_put_obj(cachep, ac, objp); |
3649 | } | 3805 | } |
3650 | 3806 | ||
3651 | /** | 3807 | /** |