diff options
-rw-r--r-- | mm/internal.h | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 89 |
2 files changed, 46 insertions, 44 deletions
diff --git a/mm/internal.h b/mm/internal.h index 29e1e761f9eb..3e910000fda4 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -370,5 +370,6 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, | |||
370 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ | 370 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ |
371 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ | 371 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ |
372 | #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ | 372 | #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ |
373 | #define ALLOC_FAIR 0x100 /* fair zone allocation */ | ||
373 | 374 | ||
374 | #endif /* __MM_INTERNAL_H */ | 375 | #endif /* __MM_INTERNAL_H */ |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 73c25912c7c4..15d140755e71 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1239,15 +1239,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
1239 | } | 1239 | } |
1240 | local_irq_restore(flags); | 1240 | local_irq_restore(flags); |
1241 | } | 1241 | } |
1242 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1243 | { | ||
1244 | return (gfp_mask & GFP_THISNODE) == GFP_THISNODE; | ||
1245 | } | ||
1246 | #else | ||
1247 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1248 | { | ||
1249 | return false; | ||
1250 | } | ||
1251 | #endif | 1242 | #endif |
1252 | 1243 | ||
1253 | /* | 1244 | /* |
@@ -1584,12 +1575,7 @@ again: | |||
1584 | get_pageblock_migratetype(page)); | 1575 | get_pageblock_migratetype(page)); |
1585 | } | 1576 | } |
1586 | 1577 | ||
1587 | /* | 1578 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); |
1588 | * NOTE: GFP_THISNODE allocations do not partake in the kswapd | ||
1589 | * aging protocol, so they can't be fair. | ||
1590 | */ | ||
1591 | if (!gfp_thisnode_allocation(gfp_flags)) | ||
1592 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | ||
1593 | 1579 | ||
1594 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1580 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1595 | zone_statistics(preferred_zone, zone, gfp_flags); | 1581 | zone_statistics(preferred_zone, zone, gfp_flags); |
@@ -1955,23 +1941,12 @@ zonelist_scan: | |||
1955 | * zone size to ensure fair page aging. The zone a | 1941 | * zone size to ensure fair page aging. The zone a |
1956 | * page was allocated in should have no effect on the | 1942 | * page was allocated in should have no effect on the |
1957 | * time the page has in memory before being reclaimed. | 1943 | * time the page has in memory before being reclaimed. |
1958 | * | ||
1959 | * Try to stay in local zones in the fastpath. If | ||
1960 | * that fails, the slowpath is entered, which will do | ||
1961 | * another pass starting with the local zones, but | ||
1962 | * ultimately fall back to remote zones that do not | ||
1963 | * partake in the fairness round-robin cycle of this | ||
1964 | * zonelist. | ||
1965 | * | ||
1966 | * NOTE: GFP_THISNODE allocations do not partake in | ||
1967 | * the kswapd aging protocol, so they can't be fair. | ||
1968 | */ | 1944 | */ |
1969 | if ((alloc_flags & ALLOC_WMARK_LOW) && | 1945 | if (alloc_flags & ALLOC_FAIR) { |
1970 | !gfp_thisnode_allocation(gfp_mask)) { | ||
1971 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | ||
1972 | continue; | ||
1973 | if (!zone_local(preferred_zone, zone)) | 1946 | if (!zone_local(preferred_zone, zone)) |
1974 | continue; | 1947 | continue; |
1948 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | ||
1949 | continue; | ||
1975 | } | 1950 | } |
1976 | /* | 1951 | /* |
1977 | * When allocating a page cache page for writing, we | 1952 | * When allocating a page cache page for writing, we |
@@ -2409,32 +2384,40 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
2409 | return page; | 2384 | return page; |
2410 | } | 2385 | } |
2411 | 2386 | ||
2412 | static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, | 2387 | static void reset_alloc_batches(struct zonelist *zonelist, |
2413 | struct zonelist *zonelist, | 2388 | enum zone_type high_zoneidx, |
2414 | enum zone_type high_zoneidx, | 2389 | struct zone *preferred_zone) |
2415 | struct zone *preferred_zone) | ||
2416 | { | 2390 | { |
2417 | struct zoneref *z; | 2391 | struct zoneref *z; |
2418 | struct zone *zone; | 2392 | struct zone *zone; |
2419 | 2393 | ||
2420 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 2394 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
2421 | if (!(gfp_mask & __GFP_NO_KSWAPD)) | ||
2422 | wakeup_kswapd(zone, order, zone_idx(preferred_zone)); | ||
2423 | /* | 2395 | /* |
2424 | * Only reset the batches of zones that were actually | 2396 | * Only reset the batches of zones that were actually |
2425 | * considered in the fast path, we don't want to | 2397 | * considered in the fairness pass, we don't want to |
2426 | * thrash fairness information for zones that are not | 2398 | * trash fairness information for zones that are not |
2427 | * actually part of this zonelist's round-robin cycle. | 2399 | * actually part of this zonelist's round-robin cycle. |
2428 | */ | 2400 | */ |
2429 | if (!zone_local(preferred_zone, zone)) | 2401 | if (!zone_local(preferred_zone, zone)) |
2430 | continue; | 2402 | continue; |
2431 | mod_zone_page_state(zone, NR_ALLOC_BATCH, | 2403 | mod_zone_page_state(zone, NR_ALLOC_BATCH, |
2432 | high_wmark_pages(zone) - | 2404 | high_wmark_pages(zone) - low_wmark_pages(zone) - |
2433 | low_wmark_pages(zone) - | 2405 | atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); |
2434 | zone_page_state(zone, NR_ALLOC_BATCH)); | ||
2435 | } | 2406 | } |
2436 | } | 2407 | } |
2437 | 2408 | ||
2409 | static void wake_all_kswapds(unsigned int order, | ||
2410 | struct zonelist *zonelist, | ||
2411 | enum zone_type high_zoneidx, | ||
2412 | struct zone *preferred_zone) | ||
2413 | { | ||
2414 | struct zoneref *z; | ||
2415 | struct zone *zone; | ||
2416 | |||
2417 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | ||
2418 | wakeup_kswapd(zone, order, zone_idx(preferred_zone)); | ||
2419 | } | ||
2420 | |||
2438 | static inline int | 2421 | static inline int |
2439 | gfp_to_alloc_flags(gfp_t gfp_mask) | 2422 | gfp_to_alloc_flags(gfp_t gfp_mask) |
2440 | { | 2423 | { |
@@ -2523,12 +2506,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2523 | * allowed per node queues are empty and that nodes are | 2506 | * allowed per node queues are empty and that nodes are |
2524 | * over allocated. | 2507 | * over allocated. |
2525 | */ | 2508 | */ |
2526 | if (gfp_thisnode_allocation(gfp_mask)) | 2509 | if (IS_ENABLED(CONFIG_NUMA) && |
2510 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2527 | goto nopage; | 2511 | goto nopage; |
2528 | 2512 | ||
2529 | restart: | 2513 | restart: |
2530 | prepare_slowpath(gfp_mask, order, zonelist, | 2514 | if (!(gfp_mask & __GFP_NO_KSWAPD)) |
2531 | high_zoneidx, preferred_zone); | 2515 | wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone); |
2532 | 2516 | ||
2533 | /* | 2517 | /* |
2534 | * OK, we're below the kswapd watermark and have kicked background | 2518 | * OK, we're below the kswapd watermark and have kicked background |
@@ -2712,7 +2696,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2712 | struct page *page = NULL; | 2696 | struct page *page = NULL; |
2713 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2697 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2714 | unsigned int cpuset_mems_cookie; | 2698 | unsigned int cpuset_mems_cookie; |
2715 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; | 2699 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; |
2716 | struct mem_cgroup *memcg = NULL; | 2700 | struct mem_cgroup *memcg = NULL; |
2717 | 2701 | ||
2718 | gfp_mask &= gfp_allowed_mask; | 2702 | gfp_mask &= gfp_allowed_mask; |
@@ -2753,12 +2737,29 @@ retry_cpuset: | |||
2753 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | 2737 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) |
2754 | alloc_flags |= ALLOC_CMA; | 2738 | alloc_flags |= ALLOC_CMA; |
2755 | #endif | 2739 | #endif |
2740 | retry: | ||
2756 | /* First allocation attempt */ | 2741 | /* First allocation attempt */ |
2757 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2742 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
2758 | zonelist, high_zoneidx, alloc_flags, | 2743 | zonelist, high_zoneidx, alloc_flags, |
2759 | preferred_zone, migratetype); | 2744 | preferred_zone, migratetype); |
2760 | if (unlikely(!page)) { | 2745 | if (unlikely(!page)) { |
2761 | /* | 2746 | /* |
2747 | * The first pass makes sure allocations are spread | ||
2748 | * fairly within the local node. However, the local | ||
2749 | * node might have free pages left after the fairness | ||
2750 | * batches are exhausted, and remote zones haven't | ||
2751 | * even been considered yet. Try once more without | ||
2752 | * fairness, and include remote zones now, before | ||
2753 | * entering the slowpath and waking kswapd: prefer | ||
2754 | * spilling to a remote zone over swapping locally. | ||
2755 | */ | ||
2756 | if (alloc_flags & ALLOC_FAIR) { | ||
2757 | reset_alloc_batches(zonelist, high_zoneidx, | ||
2758 | preferred_zone); | ||
2759 | alloc_flags &= ~ALLOC_FAIR; | ||
2760 | goto retry; | ||
2761 | } | ||
2762 | /* | ||
2762 | * Runtime PM, block IO and its error handling path | 2763 | * Runtime PM, block IO and its error handling path |
2763 | * can deadlock because I/O on the device might not | 2764 | * can deadlock because I/O on the device might not |
2764 | * complete. | 2765 | * complete. |