diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 118 |
1 files changed, 61 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 979378deccbf..5dba2933c9c0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -295,7 +295,8 @@ static inline int bad_range(struct zone *zone, struct page *page) | |||
295 | } | 295 | } |
296 | #endif | 296 | #endif |
297 | 297 | ||
298 | static void bad_page(struct page *page, char *reason, unsigned long bad_flags) | 298 | static void bad_page(struct page *page, const char *reason, |
299 | unsigned long bad_flags) | ||
299 | { | 300 | { |
300 | static unsigned long resume; | 301 | static unsigned long resume; |
301 | static unsigned long nr_shown; | 302 | static unsigned long nr_shown; |
@@ -623,7 +624,7 @@ out: | |||
623 | 624 | ||
624 | static inline int free_pages_check(struct page *page) | 625 | static inline int free_pages_check(struct page *page) |
625 | { | 626 | { |
626 | char *bad_reason = NULL; | 627 | const char *bad_reason = NULL; |
627 | unsigned long bad_flags = 0; | 628 | unsigned long bad_flags = 0; |
628 | 629 | ||
629 | if (unlikely(page_mapcount(page))) | 630 | if (unlikely(page_mapcount(page))) |
@@ -859,7 +860,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
859 | */ | 860 | */ |
860 | static inline int check_new_page(struct page *page) | 861 | static inline int check_new_page(struct page *page) |
861 | { | 862 | { |
862 | char *bad_reason = NULL; | 863 | const char *bad_reason = NULL; |
863 | unsigned long bad_flags = 0; | 864 | unsigned long bad_flags = 0; |
864 | 865 | ||
865 | if (unlikely(page_mapcount(page))) | 866 | if (unlikely(page_mapcount(page))) |
@@ -1238,15 +1239,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) | |||
1238 | } | 1239 | } |
1239 | local_irq_restore(flags); | 1240 | local_irq_restore(flags); |
1240 | } | 1241 | } |
1241 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1242 | { | ||
1243 | return (gfp_mask & GFP_THISNODE) == GFP_THISNODE; | ||
1244 | } | ||
1245 | #else | ||
1246 | static bool gfp_thisnode_allocation(gfp_t gfp_mask) | ||
1247 | { | ||
1248 | return false; | ||
1249 | } | ||
1250 | #endif | 1242 | #endif |
1251 | 1243 | ||
1252 | /* | 1244 | /* |
@@ -1583,12 +1575,7 @@ again: | |||
1583 | get_pageblock_migratetype(page)); | 1575 | get_pageblock_migratetype(page)); |
1584 | } | 1576 | } |
1585 | 1577 | ||
1586 | /* | 1578 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); |
1587 | * NOTE: GFP_THISNODE allocations do not partake in the kswapd | ||
1588 | * aging protocol, so they can't be fair. | ||
1589 | */ | ||
1590 | if (!gfp_thisnode_allocation(gfp_flags)) | ||
1591 | __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); | ||
1592 | 1579 | ||
1593 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1580 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1594 | zone_statistics(preferred_zone, zone, gfp_flags); | 1581 | zone_statistics(preferred_zone, zone, gfp_flags); |
@@ -1870,7 +1857,7 @@ static void __paginginit init_zone_allows_reclaim(int nid) | |||
1870 | { | 1857 | { |
1871 | int i; | 1858 | int i; |
1872 | 1859 | ||
1873 | for_each_online_node(i) | 1860 | for_each_node_state(i, N_MEMORY) |
1874 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) | 1861 | if (node_distance(nid, i) <= RECLAIM_DISTANCE) |
1875 | node_set(i, NODE_DATA(nid)->reclaim_nodes); | 1862 | node_set(i, NODE_DATA(nid)->reclaim_nodes); |
1876 | else | 1863 | else |
@@ -1954,23 +1941,12 @@ zonelist_scan: | |||
1954 | * zone size to ensure fair page aging. The zone a | 1941 | * zone size to ensure fair page aging. The zone a |
1955 | * page was allocated in should have no effect on the | 1942 | * page was allocated in should have no effect on the |
1956 | * time the page has in memory before being reclaimed. | 1943 | * time the page has in memory before being reclaimed. |
1957 | * | ||
1958 | * Try to stay in local zones in the fastpath. If | ||
1959 | * that fails, the slowpath is entered, which will do | ||
1960 | * another pass starting with the local zones, but | ||
1961 | * ultimately fall back to remote zones that do not | ||
1962 | * partake in the fairness round-robin cycle of this | ||
1963 | * zonelist. | ||
1964 | * | ||
1965 | * NOTE: GFP_THISNODE allocations do not partake in | ||
1966 | * the kswapd aging protocol, so they can't be fair. | ||
1967 | */ | 1944 | */ |
1968 | if ((alloc_flags & ALLOC_WMARK_LOW) && | 1945 | if (alloc_flags & ALLOC_FAIR) { |
1969 | !gfp_thisnode_allocation(gfp_mask)) { | ||
1970 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | ||
1971 | continue; | ||
1972 | if (!zone_local(preferred_zone, zone)) | 1946 | if (!zone_local(preferred_zone, zone)) |
1973 | continue; | 1947 | continue; |
1948 | if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) | ||
1949 | continue; | ||
1974 | } | 1950 | } |
1975 | /* | 1951 | /* |
1976 | * When allocating a page cache page for writing, we | 1952 | * When allocating a page cache page for writing, we |
@@ -2408,32 +2384,40 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, | |||
2408 | return page; | 2384 | return page; |
2409 | } | 2385 | } |
2410 | 2386 | ||
2411 | static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, | 2387 | static void reset_alloc_batches(struct zonelist *zonelist, |
2412 | struct zonelist *zonelist, | 2388 | enum zone_type high_zoneidx, |
2413 | enum zone_type high_zoneidx, | 2389 | struct zone *preferred_zone) |
2414 | struct zone *preferred_zone) | ||
2415 | { | 2390 | { |
2416 | struct zoneref *z; | 2391 | struct zoneref *z; |
2417 | struct zone *zone; | 2392 | struct zone *zone; |
2418 | 2393 | ||
2419 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | 2394 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { |
2420 | if (!(gfp_mask & __GFP_NO_KSWAPD)) | ||
2421 | wakeup_kswapd(zone, order, zone_idx(preferred_zone)); | ||
2422 | /* | 2395 | /* |
2423 | * Only reset the batches of zones that were actually | 2396 | * Only reset the batches of zones that were actually |
2424 | * considered in the fast path, we don't want to | 2397 | * considered in the fairness pass, we don't want to |
2425 | * thrash fairness information for zones that are not | 2398 | * trash fairness information for zones that are not |
2426 | * actually part of this zonelist's round-robin cycle. | 2399 | * actually part of this zonelist's round-robin cycle. |
2427 | */ | 2400 | */ |
2428 | if (!zone_local(preferred_zone, zone)) | 2401 | if (!zone_local(preferred_zone, zone)) |
2429 | continue; | 2402 | continue; |
2430 | mod_zone_page_state(zone, NR_ALLOC_BATCH, | 2403 | mod_zone_page_state(zone, NR_ALLOC_BATCH, |
2431 | high_wmark_pages(zone) - | 2404 | high_wmark_pages(zone) - low_wmark_pages(zone) - |
2432 | low_wmark_pages(zone) - | 2405 | atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH])); |
2433 | zone_page_state(zone, NR_ALLOC_BATCH)); | ||
2434 | } | 2406 | } |
2435 | } | 2407 | } |
2436 | 2408 | ||
2409 | static void wake_all_kswapds(unsigned int order, | ||
2410 | struct zonelist *zonelist, | ||
2411 | enum zone_type high_zoneidx, | ||
2412 | struct zone *preferred_zone) | ||
2413 | { | ||
2414 | struct zoneref *z; | ||
2415 | struct zone *zone; | ||
2416 | |||
2417 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) | ||
2418 | wakeup_kswapd(zone, order, zone_idx(preferred_zone)); | ||
2419 | } | ||
2420 | |||
2437 | static inline int | 2421 | static inline int |
2438 | gfp_to_alloc_flags(gfp_t gfp_mask) | 2422 | gfp_to_alloc_flags(gfp_t gfp_mask) |
2439 | { | 2423 | { |
@@ -2522,12 +2506,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
2522 | * allowed per node queues are empty and that nodes are | 2506 | * allowed per node queues are empty and that nodes are |
2523 | * over allocated. | 2507 | * over allocated. |
2524 | */ | 2508 | */ |
2525 | if (gfp_thisnode_allocation(gfp_mask)) | 2509 | if (IS_ENABLED(CONFIG_NUMA) && |
2510 | (gfp_mask & GFP_THISNODE) == GFP_THISNODE) | ||
2526 | goto nopage; | 2511 | goto nopage; |
2527 | 2512 | ||
2528 | restart: | 2513 | restart: |
2529 | prepare_slowpath(gfp_mask, order, zonelist, | 2514 | if (!(gfp_mask & __GFP_NO_KSWAPD)) |
2530 | high_zoneidx, preferred_zone); | 2515 | wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone); |
2531 | 2516 | ||
2532 | /* | 2517 | /* |
2533 | * OK, we're below the kswapd watermark and have kicked background | 2518 | * OK, we're below the kswapd watermark and have kicked background |
@@ -2711,7 +2696,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2711 | struct page *page = NULL; | 2696 | struct page *page = NULL; |
2712 | int migratetype = allocflags_to_migratetype(gfp_mask); | 2697 | int migratetype = allocflags_to_migratetype(gfp_mask); |
2713 | unsigned int cpuset_mems_cookie; | 2698 | unsigned int cpuset_mems_cookie; |
2714 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; | 2699 | int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; |
2715 | struct mem_cgroup *memcg = NULL; | 2700 | struct mem_cgroup *memcg = NULL; |
2716 | 2701 | ||
2717 | gfp_mask &= gfp_allowed_mask; | 2702 | gfp_mask &= gfp_allowed_mask; |
@@ -2752,12 +2737,29 @@ retry_cpuset: | |||
2752 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) | 2737 | if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) |
2753 | alloc_flags |= ALLOC_CMA; | 2738 | alloc_flags |= ALLOC_CMA; |
2754 | #endif | 2739 | #endif |
2740 | retry: | ||
2755 | /* First allocation attempt */ | 2741 | /* First allocation attempt */ |
2756 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, | 2742 | page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, |
2757 | zonelist, high_zoneidx, alloc_flags, | 2743 | zonelist, high_zoneidx, alloc_flags, |
2758 | preferred_zone, migratetype); | 2744 | preferred_zone, migratetype); |
2759 | if (unlikely(!page)) { | 2745 | if (unlikely(!page)) { |
2760 | /* | 2746 | /* |
2747 | * The first pass makes sure allocations are spread | ||
2748 | * fairly within the local node. However, the local | ||
2749 | * node might have free pages left after the fairness | ||
2750 | * batches are exhausted, and remote zones haven't | ||
2751 | * even been considered yet. Try once more without | ||
2752 | * fairness, and include remote zones now, before | ||
2753 | * entering the slowpath and waking kswapd: prefer | ||
2754 | * spilling to a remote zone over swapping locally. | ||
2755 | */ | ||
2756 | if (alloc_flags & ALLOC_FAIR) { | ||
2757 | reset_alloc_batches(zonelist, high_zoneidx, | ||
2758 | preferred_zone); | ||
2759 | alloc_flags &= ~ALLOC_FAIR; | ||
2760 | goto retry; | ||
2761 | } | ||
2762 | /* | ||
2761 | * Runtime PM, block IO and its error handling path | 2763 | * Runtime PM, block IO and its error handling path |
2762 | * can deadlock because I/O on the device might not | 2764 | * can deadlock because I/O on the device might not |
2763 | * complete. | 2765 | * complete. |
@@ -4919,7 +4921,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, | |||
4919 | 4921 | ||
4920 | pgdat->node_id = nid; | 4922 | pgdat->node_id = nid; |
4921 | pgdat->node_start_pfn = node_start_pfn; | 4923 | pgdat->node_start_pfn = node_start_pfn; |
4922 | init_zone_allows_reclaim(nid); | 4924 | if (node_state(nid, N_MEMORY)) |
4925 | init_zone_allows_reclaim(nid); | ||
4923 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | 4926 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP |
4924 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); | 4927 | get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); |
4925 | #endif | 4928 | #endif |
@@ -5070,7 +5073,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
5070 | nodemask_t saved_node_state = node_states[N_MEMORY]; | 5073 | nodemask_t saved_node_state = node_states[N_MEMORY]; |
5071 | unsigned long totalpages = early_calculate_totalpages(); | 5074 | unsigned long totalpages = early_calculate_totalpages(); |
5072 | int usable_nodes = nodes_weight(node_states[N_MEMORY]); | 5075 | int usable_nodes = nodes_weight(node_states[N_MEMORY]); |
5073 | struct memblock_type *type = &memblock.memory; | 5076 | struct memblock_region *r; |
5074 | 5077 | ||
5075 | /* Need to find movable_zone earlier when movable_node is specified. */ | 5078 | /* Need to find movable_zone earlier when movable_node is specified. */ |
5076 | find_usable_zone_for_movable(); | 5079 | find_usable_zone_for_movable(); |
@@ -5080,13 +5083,13 @@ static void __init find_zone_movable_pfns_for_nodes(void) | |||
5080 | * options. | 5083 | * options. |
5081 | */ | 5084 | */ |
5082 | if (movable_node_is_enabled()) { | 5085 | if (movable_node_is_enabled()) { |
5083 | for (i = 0; i < type->cnt; i++) { | 5086 | for_each_memblock(memory, r) { |
5084 | if (!memblock_is_hotpluggable(&type->regions[i])) | 5087 | if (!memblock_is_hotpluggable(r)) |
5085 | continue; | 5088 | continue; |
5086 | 5089 | ||
5087 | nid = type->regions[i].nid; | 5090 | nid = r->nid; |
5088 | 5091 | ||
5089 | usable_startpfn = PFN_DOWN(type->regions[i].base); | 5092 | usable_startpfn = PFN_DOWN(r->base); |
5090 | zone_movable_pfn[nid] = zone_movable_pfn[nid] ? | 5093 | zone_movable_pfn[nid] = zone_movable_pfn[nid] ? |
5091 | min(usable_startpfn, zone_movable_pfn[nid]) : | 5094 | min(usable_startpfn, zone_movable_pfn[nid]) : |
5092 | usable_startpfn; | 5095 | usable_startpfn; |
@@ -6544,7 +6547,8 @@ static void dump_page_flags(unsigned long flags) | |||
6544 | printk(")\n"); | 6547 | printk(")\n"); |
6545 | } | 6548 | } |
6546 | 6549 | ||
6547 | void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) | 6550 | void dump_page_badflags(struct page *page, const char *reason, |
6551 | unsigned long badflags) | ||
6548 | { | 6552 | { |
6549 | printk(KERN_ALERT | 6553 | printk(KERN_ALERT |
6550 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", | 6554 | "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", |
@@ -6560,8 +6564,8 @@ void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) | |||
6560 | mem_cgroup_print_bad_page(page); | 6564 | mem_cgroup_print_bad_page(page); |
6561 | } | 6565 | } |
6562 | 6566 | ||
6563 | void dump_page(struct page *page, char *reason) | 6567 | void dump_page(struct page *page, const char *reason) |
6564 | { | 6568 | { |
6565 | dump_page_badflags(page, reason, 0); | 6569 | dump_page_badflags(page, reason, 0); |
6566 | } | 6570 | } |
6567 | EXPORT_SYMBOL_GPL(dump_page); | 6571 | EXPORT_SYMBOL(dump_page); |