diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 108 |
1 files changed, 96 insertions, 12 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 298b449a03c7..251b8a0c9c5d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -2375,20 +2375,30 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, | |||
2375 | * condition simpler. | 2375 | * condition simpler. |
2376 | */ | 2376 | */ |
2377 | static __always_inline bool | 2377 | static __always_inline bool |
2378 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) | 2378 | __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, |
2379 | unsigned int alloc_flags) | ||
2379 | { | 2380 | { |
2380 | struct free_area *area; | 2381 | struct free_area *area; |
2381 | int current_order; | 2382 | int current_order; |
2383 | int min_order = order; | ||
2382 | struct page *page; | 2384 | struct page *page; |
2383 | int fallback_mt; | 2385 | int fallback_mt; |
2384 | bool can_steal; | 2386 | bool can_steal; |
2385 | 2387 | ||
2386 | /* | 2388 | /* |
2389 | * Do not steal pages from freelists belonging to other pageblocks | ||
2390 | * i.e. orders < pageblock_order. If there are no local zones free, | ||
2391 | * the zonelists will be reiterated without ALLOC_NOFRAGMENT. | ||
2392 | */ | ||
2393 | if (alloc_flags & ALLOC_NOFRAGMENT) | ||
2394 | min_order = pageblock_order; | ||
2395 | |||
2396 | /* | ||
2387 | * Find the largest available free page in the other list. This roughly | 2397 | * Find the largest available free page in the other list. This roughly |
2388 | * approximates finding the pageblock with the most free pages, which | 2398 | * approximates finding the pageblock with the most free pages, which |
2389 | * would be too costly to do exactly. | 2399 | * would be too costly to do exactly. |
2390 | */ | 2400 | */ |
2391 | for (current_order = MAX_ORDER - 1; current_order >= order; | 2401 | for (current_order = MAX_ORDER - 1; current_order >= min_order; |
2392 | --current_order) { | 2402 | --current_order) { |
2393 | area = &(zone->free_area[current_order]); | 2403 | area = &(zone->free_area[current_order]); |
2394 | fallback_mt = find_suitable_fallback(area, current_order, | 2404 | fallback_mt = find_suitable_fallback(area, current_order, |
@@ -2447,7 +2457,8 @@ do_steal: | |||
2447 | * Call me with the zone->lock already held. | 2457 | * Call me with the zone->lock already held. |
2448 | */ | 2458 | */ |
2449 | static __always_inline struct page * | 2459 | static __always_inline struct page * |
2450 | __rmqueue(struct zone *zone, unsigned int order, int migratetype) | 2460 | __rmqueue(struct zone *zone, unsigned int order, int migratetype, |
2461 | unsigned int alloc_flags) | ||
2451 | { | 2462 | { |
2452 | struct page *page; | 2463 | struct page *page; |
2453 | 2464 | ||
@@ -2457,7 +2468,8 @@ retry: | |||
2457 | if (migratetype == MIGRATE_MOVABLE) | 2468 | if (migratetype == MIGRATE_MOVABLE) |
2458 | page = __rmqueue_cma_fallback(zone, order); | 2469 | page = __rmqueue_cma_fallback(zone, order); |
2459 | 2470 | ||
2460 | if (!page && __rmqueue_fallback(zone, order, migratetype)) | 2471 | if (!page && __rmqueue_fallback(zone, order, migratetype, |
2472 | alloc_flags)) | ||
2461 | goto retry; | 2473 | goto retry; |
2462 | } | 2474 | } |
2463 | 2475 | ||
@@ -2472,13 +2484,14 @@ retry: | |||
2472 | */ | 2484 | */ |
2473 | static int rmqueue_bulk(struct zone *zone, unsigned int order, | 2485 | static int rmqueue_bulk(struct zone *zone, unsigned int order, |
2474 | unsigned long count, struct list_head *list, | 2486 | unsigned long count, struct list_head *list, |
2475 | int migratetype) | 2487 | int migratetype, unsigned int alloc_flags) |
2476 | { | 2488 | { |
2477 | int i, alloced = 0; | 2489 | int i, alloced = 0; |
2478 | 2490 | ||
2479 | spin_lock(&zone->lock); | 2491 | spin_lock(&zone->lock); |
2480 | for (i = 0; i < count; ++i) { | 2492 | for (i = 0; i < count; ++i) { |
2481 | struct page *page = __rmqueue(zone, order, migratetype); | 2493 | struct page *page = __rmqueue(zone, order, migratetype, |
2494 | alloc_flags); | ||
2482 | if (unlikely(page == NULL)) | 2495 | if (unlikely(page == NULL)) |
2483 | break; | 2496 | break; |
2484 | 2497 | ||
@@ -2934,6 +2947,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z) | |||
2934 | 2947 | ||
2935 | /* Remove page from the per-cpu list, caller must protect the list */ | 2948 | /* Remove page from the per-cpu list, caller must protect the list */ |
2936 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | 2949 | static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, |
2950 | unsigned int alloc_flags, | ||
2937 | struct per_cpu_pages *pcp, | 2951 | struct per_cpu_pages *pcp, |
2938 | struct list_head *list) | 2952 | struct list_head *list) |
2939 | { | 2953 | { |
@@ -2943,7 +2957,7 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | |||
2943 | if (list_empty(list)) { | 2957 | if (list_empty(list)) { |
2944 | pcp->count += rmqueue_bulk(zone, 0, | 2958 | pcp->count += rmqueue_bulk(zone, 0, |
2945 | pcp->batch, list, | 2959 | pcp->batch, list, |
2946 | migratetype); | 2960 | migratetype, alloc_flags); |
2947 | if (unlikely(list_empty(list))) | 2961 | if (unlikely(list_empty(list))) |
2948 | return NULL; | 2962 | return NULL; |
2949 | } | 2963 | } |
@@ -2959,7 +2973,8 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, | |||
2959 | /* Lock and remove page from the per-cpu list */ | 2973 | /* Lock and remove page from the per-cpu list */ |
2960 | static struct page *rmqueue_pcplist(struct zone *preferred_zone, | 2974 | static struct page *rmqueue_pcplist(struct zone *preferred_zone, |
2961 | struct zone *zone, unsigned int order, | 2975 | struct zone *zone, unsigned int order, |
2962 | gfp_t gfp_flags, int migratetype) | 2976 | gfp_t gfp_flags, int migratetype, |
2977 | unsigned int alloc_flags) | ||
2963 | { | 2978 | { |
2964 | struct per_cpu_pages *pcp; | 2979 | struct per_cpu_pages *pcp; |
2965 | struct list_head *list; | 2980 | struct list_head *list; |
@@ -2969,7 +2984,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, | |||
2969 | local_irq_save(flags); | 2984 | local_irq_save(flags); |
2970 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | 2985 | pcp = &this_cpu_ptr(zone->pageset)->pcp; |
2971 | list = &pcp->lists[migratetype]; | 2986 | list = &pcp->lists[migratetype]; |
2972 | page = __rmqueue_pcplist(zone, migratetype, pcp, list); | 2987 | page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list); |
2973 | if (page) { | 2988 | if (page) { |
2974 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); | 2989 | __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
2975 | zone_statistics(preferred_zone, zone); | 2990 | zone_statistics(preferred_zone, zone); |
@@ -2992,7 +3007,7 @@ struct page *rmqueue(struct zone *preferred_zone, | |||
2992 | 3007 | ||
2993 | if (likely(order == 0)) { | 3008 | if (likely(order == 0)) { |
2994 | page = rmqueue_pcplist(preferred_zone, zone, order, | 3009 | page = rmqueue_pcplist(preferred_zone, zone, order, |
2995 | gfp_flags, migratetype); | 3010 | gfp_flags, migratetype, alloc_flags); |
2996 | goto out; | 3011 | goto out; |
2997 | } | 3012 | } |
2998 | 3013 | ||
@@ -3011,7 +3026,7 @@ struct page *rmqueue(struct zone *preferred_zone, | |||
3011 | trace_mm_page_alloc_zone_locked(page, order, migratetype); | 3026 | trace_mm_page_alloc_zone_locked(page, order, migratetype); |
3012 | } | 3027 | } |
3013 | if (!page) | 3028 | if (!page) |
3014 | page = __rmqueue(zone, order, migratetype); | 3029 | page = __rmqueue(zone, order, migratetype, alloc_flags); |
3015 | } while (page && check_new_pages(page, order)); | 3030 | } while (page && check_new_pages(page, order)); |
3016 | spin_unlock(&zone->lock); | 3031 | spin_unlock(&zone->lock); |
3017 | if (!page) | 3032 | if (!page) |
@@ -3253,6 +3268,40 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone) | |||
3253 | } | 3268 | } |
3254 | #endif /* CONFIG_NUMA */ | 3269 | #endif /* CONFIG_NUMA */ |
3255 | 3270 | ||
3271 | #ifdef CONFIG_ZONE_DMA32 | ||
3272 | /* | ||
3273 | * The restriction on ZONE_DMA32 as being a suitable zone to use to avoid | ||
3274 | * fragmentation is subtle. If the preferred zone was HIGHMEM then | ||
3275 | * premature use of a lower zone may cause lowmem pressure problems that | ||
3276 | * are worse than fragmentation. If the next zone is ZONE_DMA then it is | ||
3277 | * probably too small. It only makes sense to spread allocations to avoid | ||
3278 | * fragmentation between the Normal and DMA32 zones. | ||
3279 | */ | ||
3280 | static inline unsigned int | ||
3281 | alloc_flags_nofragment(struct zone *zone) | ||
3282 | { | ||
3283 | if (zone_idx(zone) != ZONE_NORMAL) | ||
3284 | return 0; | ||
3285 | |||
3286 | /* | ||
3287 | * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and | ||
3288 | * the pointer is within zone->zone_pgdat->node_zones[]. Also assume | ||
3289 | * on UMA that if Normal is populated then so is DMA32. | ||
3290 | */ | ||
3291 | BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1); | ||
3292 | if (nr_online_nodes > 1 && !populated_zone(--zone)) | ||
3293 | return 0; | ||
3294 | |||
3295 | return ALLOC_NOFRAGMENT; | ||
3296 | } | ||
3297 | #else | ||
3298 | static inline unsigned int | ||
3299 | alloc_flags_nofragment(struct zone *zone) | ||
3300 | { | ||
3301 | return 0; | ||
3302 | } | ||
3303 | #endif | ||
3304 | |||
3256 | /* | 3305 | /* |
3257 | * get_page_from_freelist goes through the zonelist trying to allocate | 3306 | * get_page_from_freelist goes through the zonelist trying to allocate |
3258 | * a page. | 3307 | * a page. |
@@ -3261,14 +3310,18 @@ static struct page * | |||
3261 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, | 3310 | get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, |
3262 | const struct alloc_context *ac) | 3311 | const struct alloc_context *ac) |
3263 | { | 3312 | { |
3264 | struct zoneref *z = ac->preferred_zoneref; | 3313 | struct zoneref *z; |
3265 | struct zone *zone; | 3314 | struct zone *zone; |
3266 | struct pglist_data *last_pgdat_dirty_limit = NULL; | 3315 | struct pglist_data *last_pgdat_dirty_limit = NULL; |
3316 | bool no_fallback; | ||
3267 | 3317 | ||
3318 | retry: | ||
3268 | /* | 3319 | /* |
3269 | * Scan zonelist, looking for a zone with enough free. | 3320 | * Scan zonelist, looking for a zone with enough free. |
3270 | * See also __cpuset_node_allowed() comment in kernel/cpuset.c. | 3321 | * See also __cpuset_node_allowed() comment in kernel/cpuset.c. |
3271 | */ | 3322 | */ |
3323 | no_fallback = alloc_flags & ALLOC_NOFRAGMENT; | ||
3324 | z = ac->preferred_zoneref; | ||
3272 | for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, | 3325 | for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, |
3273 | ac->nodemask) { | 3326 | ac->nodemask) { |
3274 | struct page *page; | 3327 | struct page *page; |
@@ -3307,6 +3360,22 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, | |||
3307 | } | 3360 | } |
3308 | } | 3361 | } |
3309 | 3362 | ||
3363 | if (no_fallback && nr_online_nodes > 1 && | ||
3364 | zone != ac->preferred_zoneref->zone) { | ||
3365 | int local_nid; | ||
3366 | |||
3367 | /* | ||
3368 | * If moving to a remote node, retry but allow | ||
3369 | * fragmenting fallbacks. Locality is more important | ||
3370 | * than fragmentation avoidance. | ||
3371 | */ | ||
3372 | local_nid = zone_to_nid(ac->preferred_zoneref->zone); | ||
3373 | if (zone_to_nid(zone) != local_nid) { | ||
3374 | alloc_flags &= ~ALLOC_NOFRAGMENT; | ||
3375 | goto retry; | ||
3376 | } | ||
3377 | } | ||
3378 | |||
3310 | mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; | 3379 | mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; |
3311 | if (!zone_watermark_fast(zone, order, mark, | 3380 | if (!zone_watermark_fast(zone, order, mark, |
3312 | ac_classzone_idx(ac), alloc_flags)) { | 3381 | ac_classzone_idx(ac), alloc_flags)) { |
@@ -3374,6 +3443,15 @@ try_this_zone: | |||
3374 | } | 3443 | } |
3375 | } | 3444 | } |
3376 | 3445 | ||
3446 | /* | ||
3447 | * It's possible on a UMA machine to get through all zones that are | ||
3448 | * fragmented. If avoiding fragmentation, reset and try again. | ||
3449 | */ | ||
3450 | if (no_fallback) { | ||
3451 | alloc_flags &= ~ALLOC_NOFRAGMENT; | ||
3452 | goto retry; | ||
3453 | } | ||
3454 | |||
3377 | return NULL; | 3455 | return NULL; |
3378 | } | 3456 | } |
3379 | 3457 | ||
@@ -4369,6 +4447,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, | |||
4369 | 4447 | ||
4370 | finalise_ac(gfp_mask, &ac); | 4448 | finalise_ac(gfp_mask, &ac); |
4371 | 4449 | ||
4450 | /* | ||
4451 | * Forbid the first pass from falling back to types that fragment | ||
4452 | * memory until all local zones are considered. | ||
4453 | */ | ||
4454 | alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone); | ||
4455 | |||
4372 | /* First allocation attempt */ | 4456 | /* First allocation attempt */ |
4373 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); | 4457 | page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); |
4374 | if (likely(page)) | 4458 | if (likely(page)) |