aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c108
1 files changed, 96 insertions, 12 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 298b449a03c7..251b8a0c9c5d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2375,20 +2375,30 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
2375 * condition simpler. 2375 * condition simpler.
2376 */ 2376 */
2377static __always_inline bool 2377static __always_inline bool
2378__rmqueue_fallback(struct zone *zone, int order, int start_migratetype) 2378__rmqueue_fallback(struct zone *zone, int order, int start_migratetype,
2379 unsigned int alloc_flags)
2379{ 2380{
2380 struct free_area *area; 2381 struct free_area *area;
2381 int current_order; 2382 int current_order;
2383 int min_order = order;
2382 struct page *page; 2384 struct page *page;
2383 int fallback_mt; 2385 int fallback_mt;
2384 bool can_steal; 2386 bool can_steal;
2385 2387
2386 /* 2388 /*
2389 * Do not steal pages from freelists belonging to other pageblocks
2390 * i.e. orders < pageblock_order. If there are no local zones free,
2391 * the zonelists will be reiterated without ALLOC_NOFRAGMENT.
2392 */
2393 if (alloc_flags & ALLOC_NOFRAGMENT)
2394 min_order = pageblock_order;
2395
2396 /*
2387 * Find the largest available free page in the other list. This roughly 2397 * Find the largest available free page in the other list. This roughly
2388 * approximates finding the pageblock with the most free pages, which 2398 * approximates finding the pageblock with the most free pages, which
2389 * would be too costly to do exactly. 2399 * would be too costly to do exactly.
2390 */ 2400 */
2391 for (current_order = MAX_ORDER - 1; current_order >= order; 2401 for (current_order = MAX_ORDER - 1; current_order >= min_order;
2392 --current_order) { 2402 --current_order) {
2393 area = &(zone->free_area[current_order]); 2403 area = &(zone->free_area[current_order]);
2394 fallback_mt = find_suitable_fallback(area, current_order, 2404 fallback_mt = find_suitable_fallback(area, current_order,
@@ -2447,7 +2457,8 @@ do_steal:
2447 * Call me with the zone->lock already held. 2457 * Call me with the zone->lock already held.
2448 */ 2458 */
2449static __always_inline struct page * 2459static __always_inline struct page *
2450__rmqueue(struct zone *zone, unsigned int order, int migratetype) 2460__rmqueue(struct zone *zone, unsigned int order, int migratetype,
2461 unsigned int alloc_flags)
2451{ 2462{
2452 struct page *page; 2463 struct page *page;
2453 2464
@@ -2457,7 +2468,8 @@ retry:
2457 if (migratetype == MIGRATE_MOVABLE) 2468 if (migratetype == MIGRATE_MOVABLE)
2458 page = __rmqueue_cma_fallback(zone, order); 2469 page = __rmqueue_cma_fallback(zone, order);
2459 2470
2460 if (!page && __rmqueue_fallback(zone, order, migratetype)) 2471 if (!page && __rmqueue_fallback(zone, order, migratetype,
2472 alloc_flags))
2461 goto retry; 2473 goto retry;
2462 } 2474 }
2463 2475
@@ -2472,13 +2484,14 @@ retry:
2472 */ 2484 */
2473static int rmqueue_bulk(struct zone *zone, unsigned int order, 2485static int rmqueue_bulk(struct zone *zone, unsigned int order,
2474 unsigned long count, struct list_head *list, 2486 unsigned long count, struct list_head *list,
2475 int migratetype) 2487 int migratetype, unsigned int alloc_flags)
2476{ 2488{
2477 int i, alloced = 0; 2489 int i, alloced = 0;
2478 2490
2479 spin_lock(&zone->lock); 2491 spin_lock(&zone->lock);
2480 for (i = 0; i < count; ++i) { 2492 for (i = 0; i < count; ++i) {
2481 struct page *page = __rmqueue(zone, order, migratetype); 2493 struct page *page = __rmqueue(zone, order, migratetype,
2494 alloc_flags);
2482 if (unlikely(page == NULL)) 2495 if (unlikely(page == NULL))
2483 break; 2496 break;
2484 2497
@@ -2934,6 +2947,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
2934 2947
2935/* Remove page from the per-cpu list, caller must protect the list */ 2948/* Remove page from the per-cpu list, caller must protect the list */
2936static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype, 2949static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
2950 unsigned int alloc_flags,
2937 struct per_cpu_pages *pcp, 2951 struct per_cpu_pages *pcp,
2938 struct list_head *list) 2952 struct list_head *list)
2939{ 2953{
@@ -2943,7 +2957,7 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
2943 if (list_empty(list)) { 2957 if (list_empty(list)) {
2944 pcp->count += rmqueue_bulk(zone, 0, 2958 pcp->count += rmqueue_bulk(zone, 0,
2945 pcp->batch, list, 2959 pcp->batch, list,
2946 migratetype); 2960 migratetype, alloc_flags);
2947 if (unlikely(list_empty(list))) 2961 if (unlikely(list_empty(list)))
2948 return NULL; 2962 return NULL;
2949 } 2963 }
@@ -2959,7 +2973,8 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
2959/* Lock and remove page from the per-cpu list */ 2973/* Lock and remove page from the per-cpu list */
2960static struct page *rmqueue_pcplist(struct zone *preferred_zone, 2974static struct page *rmqueue_pcplist(struct zone *preferred_zone,
2961 struct zone *zone, unsigned int order, 2975 struct zone *zone, unsigned int order,
2962 gfp_t gfp_flags, int migratetype) 2976 gfp_t gfp_flags, int migratetype,
2977 unsigned int alloc_flags)
2963{ 2978{
2964 struct per_cpu_pages *pcp; 2979 struct per_cpu_pages *pcp;
2965 struct list_head *list; 2980 struct list_head *list;
@@ -2969,7 +2984,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
2969 local_irq_save(flags); 2984 local_irq_save(flags);
2970 pcp = &this_cpu_ptr(zone->pageset)->pcp; 2985 pcp = &this_cpu_ptr(zone->pageset)->pcp;
2971 list = &pcp->lists[migratetype]; 2986 list = &pcp->lists[migratetype];
2972 page = __rmqueue_pcplist(zone, migratetype, pcp, list); 2987 page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
2973 if (page) { 2988 if (page) {
2974 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); 2989 __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
2975 zone_statistics(preferred_zone, zone); 2990 zone_statistics(preferred_zone, zone);
@@ -2992,7 +3007,7 @@ struct page *rmqueue(struct zone *preferred_zone,
2992 3007
2993 if (likely(order == 0)) { 3008 if (likely(order == 0)) {
2994 page = rmqueue_pcplist(preferred_zone, zone, order, 3009 page = rmqueue_pcplist(preferred_zone, zone, order,
2995 gfp_flags, migratetype); 3010 gfp_flags, migratetype, alloc_flags);
2996 goto out; 3011 goto out;
2997 } 3012 }
2998 3013
@@ -3011,7 +3026,7 @@ struct page *rmqueue(struct zone *preferred_zone,
3011 trace_mm_page_alloc_zone_locked(page, order, migratetype); 3026 trace_mm_page_alloc_zone_locked(page, order, migratetype);
3012 } 3027 }
3013 if (!page) 3028 if (!page)
3014 page = __rmqueue(zone, order, migratetype); 3029 page = __rmqueue(zone, order, migratetype, alloc_flags);
3015 } while (page && check_new_pages(page, order)); 3030 } while (page && check_new_pages(page, order));
3016 spin_unlock(&zone->lock); 3031 spin_unlock(&zone->lock);
3017 if (!page) 3032 if (!page)
@@ -3253,6 +3268,40 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
3253} 3268}
3254#endif /* CONFIG_NUMA */ 3269#endif /* CONFIG_NUMA */
3255 3270
3271#ifdef CONFIG_ZONE_DMA32
3272/*
3273 * The restriction on ZONE_DMA32 as being a suitable zone to use to avoid
3274 * fragmentation is subtle. If the preferred zone was HIGHMEM then
3275 * premature use of a lower zone may cause lowmem pressure problems that
3276 * are worse than fragmentation. If the next zone is ZONE_DMA then it is
3277 * probably too small. It only makes sense to spread allocations to avoid
3278 * fragmentation between the Normal and DMA32 zones.
3279 */
3280static inline unsigned int
3281alloc_flags_nofragment(struct zone *zone)
3282{
3283 if (zone_idx(zone) != ZONE_NORMAL)
3284 return 0;
3285
3286 /*
3287 * If ZONE_DMA32 exists, assume it is the one after ZONE_NORMAL and
3288 * the pointer is within zone->zone_pgdat->node_zones[]. Also assume
3289 * on UMA that if Normal is populated then so is DMA32.
3290 */
3291 BUILD_BUG_ON(ZONE_NORMAL - ZONE_DMA32 != 1);
3292 if (nr_online_nodes > 1 && !populated_zone(--zone))
3293 return 0;
3294
3295 return ALLOC_NOFRAGMENT;
3296}
3297#else
3298static inline unsigned int
3299alloc_flags_nofragment(struct zone *zone)
3300{
3301 return 0;
3302}
3303#endif
3304
3256/* 3305/*
3257 * get_page_from_freelist goes through the zonelist trying to allocate 3306 * get_page_from_freelist goes through the zonelist trying to allocate
3258 * a page. 3307 * a page.
@@ -3261,14 +3310,18 @@ static struct page *
3261get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, 3310get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
3262 const struct alloc_context *ac) 3311 const struct alloc_context *ac)
3263{ 3312{
3264 struct zoneref *z = ac->preferred_zoneref; 3313 struct zoneref *z;
3265 struct zone *zone; 3314 struct zone *zone;
3266 struct pglist_data *last_pgdat_dirty_limit = NULL; 3315 struct pglist_data *last_pgdat_dirty_limit = NULL;
3316 bool no_fallback;
3267 3317
3318retry:
3268 /* 3319 /*
3269 * Scan zonelist, looking for a zone with enough free. 3320 * Scan zonelist, looking for a zone with enough free.
3270 * See also __cpuset_node_allowed() comment in kernel/cpuset.c. 3321 * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
3271 */ 3322 */
3323 no_fallback = alloc_flags & ALLOC_NOFRAGMENT;
3324 z = ac->preferred_zoneref;
3272 for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, 3325 for_next_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
3273 ac->nodemask) { 3326 ac->nodemask) {
3274 struct page *page; 3327 struct page *page;
@@ -3307,6 +3360,22 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags,
3307 } 3360 }
3308 } 3361 }
3309 3362
3363 if (no_fallback && nr_online_nodes > 1 &&
3364 zone != ac->preferred_zoneref->zone) {
3365 int local_nid;
3366
3367 /*
3368 * If moving to a remote node, retry but allow
3369 * fragmenting fallbacks. Locality is more important
3370 * than fragmentation avoidance.
3371 */
3372 local_nid = zone_to_nid(ac->preferred_zoneref->zone);
3373 if (zone_to_nid(zone) != local_nid) {
3374 alloc_flags &= ~ALLOC_NOFRAGMENT;
3375 goto retry;
3376 }
3377 }
3378
3310 mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; 3379 mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
3311 if (!zone_watermark_fast(zone, order, mark, 3380 if (!zone_watermark_fast(zone, order, mark,
3312 ac_classzone_idx(ac), alloc_flags)) { 3381 ac_classzone_idx(ac), alloc_flags)) {
@@ -3374,6 +3443,15 @@ try_this_zone:
3374 } 3443 }
3375 } 3444 }
3376 3445
3446 /*
3447 * It's possible on a UMA machine to get through all zones that are
3448 * fragmented. If avoiding fragmentation, reset and try again.
3449 */
3450 if (no_fallback) {
3451 alloc_flags &= ~ALLOC_NOFRAGMENT;
3452 goto retry;
3453 }
3454
3377 return NULL; 3455 return NULL;
3378} 3456}
3379 3457
@@ -4369,6 +4447,12 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
4369 4447
4370 finalise_ac(gfp_mask, &ac); 4448 finalise_ac(gfp_mask, &ac);
4371 4449
4450 /*
4451 * Forbid the first pass from falling back to types that fragment
4452 * memory until all local zones are considered.
4453 */
4454 alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone);
4455
4372 /* First allocation attempt */ 4456 /* First allocation attempt */
4373 page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); 4457 page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
4374 if (likely(page)) 4458 if (likely(page))