aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/internal.h1
-rw-r--r--mm/page_alloc.c89
2 files changed, 46 insertions, 44 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 29e1e761f9eb..3e910000fda4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -370,5 +370,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
370#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ 370#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
371#define ALLOC_CPUSET 0x40 /* check for correct cpuset */ 371#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
372#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ 372#define ALLOC_CMA 0x80 /* allow allocations from CMA areas */
373#define ALLOC_FAIR 0x100 /* fair zone allocation */
373 374
374#endif /* __MM_INTERNAL_H */ 375#endif /* __MM_INTERNAL_H */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 73c25912c7c4..15d140755e71 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1239,15 +1239,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
1239 } 1239 }
1240 local_irq_restore(flags); 1240 local_irq_restore(flags);
1241} 1241}
1242static bool gfp_thisnode_allocation(gfp_t gfp_mask)
1243{
1244 return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
1245}
1246#else
1247static bool gfp_thisnode_allocation(gfp_t gfp_mask)
1248{
1249 return false;
1250}
1251#endif 1242#endif
1252 1243
1253/* 1244/*
@@ -1584,12 +1575,7 @@ again:
1584 get_pageblock_migratetype(page)); 1575 get_pageblock_migratetype(page));
1585 } 1576 }
1586 1577
1587 /* 1578 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1588 * NOTE: GFP_THISNODE allocations do not partake in the kswapd
1589 * aging protocol, so they can't be fair.
1590 */
1591 if (!gfp_thisnode_allocation(gfp_flags))
1592 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1593 1579
1594 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1580 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1595 zone_statistics(preferred_zone, zone, gfp_flags); 1581 zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1955,23 +1941,12 @@ zonelist_scan:
1955 * zone size to ensure fair page aging. The zone a 1941 * zone size to ensure fair page aging. The zone a
1956 * page was allocated in should have no effect on the 1942 * page was allocated in should have no effect on the
1957 * time the page has in memory before being reclaimed. 1943 * time the page has in memory before being reclaimed.
1958 *
1959 * Try to stay in local zones in the fastpath. If
1960 * that fails, the slowpath is entered, which will do
1961 * another pass starting with the local zones, but
1962 * ultimately fall back to remote zones that do not
1963 * partake in the fairness round-robin cycle of this
1964 * zonelist.
1965 *
1966 * NOTE: GFP_THISNODE allocations do not partake in
1967 * the kswapd aging protocol, so they can't be fair.
1968 */ 1944 */
1969 if ((alloc_flags & ALLOC_WMARK_LOW) && 1945 if (alloc_flags & ALLOC_FAIR) {
1970 !gfp_thisnode_allocation(gfp_mask)) {
1971 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
1972 continue;
1973 if (!zone_local(preferred_zone, zone)) 1946 if (!zone_local(preferred_zone, zone))
1974 continue; 1947 continue;
1948 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
1949 continue;
1975 } 1950 }
1976 /* 1951 /*
1977 * When allocating a page cache page for writing, we 1952 * When allocating a page cache page for writing, we
@@ -2409,32 +2384,40 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
2409 return page; 2384 return page;
2410} 2385}
2411 2386
2412static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, 2387static void reset_alloc_batches(struct zonelist *zonelist,
2413 struct zonelist *zonelist, 2388 enum zone_type high_zoneidx,
2414 enum zone_type high_zoneidx, 2389 struct zone *preferred_zone)
2415 struct zone *preferred_zone)
2416{ 2390{
2417 struct zoneref *z; 2391 struct zoneref *z;
2418 struct zone *zone; 2392 struct zone *zone;
2419 2393
2420 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 2394 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
2421 if (!(gfp_mask & __GFP_NO_KSWAPD))
2422 wakeup_kswapd(zone, order, zone_idx(preferred_zone));
2423 /* 2395 /*
2424 * Only reset the batches of zones that were actually 2396 * Only reset the batches of zones that were actually
2425 * considered in the fast path, we don't want to 2397 * considered in the fairness pass, we don't want to
2426 * thrash fairness information for zones that are not 2398 * trash fairness information for zones that are not
2427 * actually part of this zonelist's round-robin cycle. 2399 * actually part of this zonelist's round-robin cycle.
2428 */ 2400 */
2429 if (!zone_local(preferred_zone, zone)) 2401 if (!zone_local(preferred_zone, zone))
2430 continue; 2402 continue;
2431 mod_zone_page_state(zone, NR_ALLOC_BATCH, 2403 mod_zone_page_state(zone, NR_ALLOC_BATCH,
2432 high_wmark_pages(zone) - 2404 high_wmark_pages(zone) - low_wmark_pages(zone) -
2433 low_wmark_pages(zone) - 2405 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
2434 zone_page_state(zone, NR_ALLOC_BATCH));
2435 } 2406 }
2436} 2407}
2437 2408
2409static void wake_all_kswapds(unsigned int order,
2410 struct zonelist *zonelist,
2411 enum zone_type high_zoneidx,
2412 struct zone *preferred_zone)
2413{
2414 struct zoneref *z;
2415 struct zone *zone;
2416
2417 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
2418 wakeup_kswapd(zone, order, zone_idx(preferred_zone));
2419}
2420
2438static inline int 2421static inline int
2439gfp_to_alloc_flags(gfp_t gfp_mask) 2422gfp_to_alloc_flags(gfp_t gfp_mask)
2440{ 2423{
@@ -2523,12 +2506,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2523 * allowed per node queues are empty and that nodes are 2506 * allowed per node queues are empty and that nodes are
2524 * over allocated. 2507 * over allocated.
2525 */ 2508 */
2526 if (gfp_thisnode_allocation(gfp_mask)) 2509 if (IS_ENABLED(CONFIG_NUMA) &&
2510 (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2527 goto nopage; 2511 goto nopage;
2528 2512
2529restart: 2513restart:
2530 prepare_slowpath(gfp_mask, order, zonelist, 2514 if (!(gfp_mask & __GFP_NO_KSWAPD))
2531 high_zoneidx, preferred_zone); 2515 wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
2532 2516
2533 /* 2517 /*
2534 * OK, we're below the kswapd watermark and have kicked background 2518 * OK, we're below the kswapd watermark and have kicked background
@@ -2712,7 +2696,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2712 struct page *page = NULL; 2696 struct page *page = NULL;
2713 int migratetype = allocflags_to_migratetype(gfp_mask); 2697 int migratetype = allocflags_to_migratetype(gfp_mask);
2714 unsigned int cpuset_mems_cookie; 2698 unsigned int cpuset_mems_cookie;
2715 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; 2699 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
2716 struct mem_cgroup *memcg = NULL; 2700 struct mem_cgroup *memcg = NULL;
2717 2701
2718 gfp_mask &= gfp_allowed_mask; 2702 gfp_mask &= gfp_allowed_mask;
@@ -2753,12 +2737,29 @@ retry_cpuset:
2753 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) 2737 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
2754 alloc_flags |= ALLOC_CMA; 2738 alloc_flags |= ALLOC_CMA;
2755#endif 2739#endif
2740retry:
2756 /* First allocation attempt */ 2741 /* First allocation attempt */
2757 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2742 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
2758 zonelist, high_zoneidx, alloc_flags, 2743 zonelist, high_zoneidx, alloc_flags,
2759 preferred_zone, migratetype); 2744 preferred_zone, migratetype);
2760 if (unlikely(!page)) { 2745 if (unlikely(!page)) {
2761 /* 2746 /*
2747 * The first pass makes sure allocations are spread
2748 * fairly within the local node. However, the local
2749 * node might have free pages left after the fairness
2750 * batches are exhausted, and remote zones haven't
2751 * even been considered yet. Try once more without
2752 * fairness, and include remote zones now, before
2753 * entering the slowpath and waking kswapd: prefer
2754 * spilling to a remote zone over swapping locally.
2755 */
2756 if (alloc_flags & ALLOC_FAIR) {
2757 reset_alloc_batches(zonelist, high_zoneidx,
2758 preferred_zone);
2759 alloc_flags &= ~ALLOC_FAIR;
2760 goto retry;
2761 }
2762 /*
2762 * Runtime PM, block IO and its error handling path 2763 * Runtime PM, block IO and its error handling path
2763 * can deadlock because I/O on the device might not 2764 * can deadlock because I/O on the device might not
2764 * complete. 2765 * complete.