aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c118
1 files changed, 61 insertions, 57 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 979378deccbf..5dba2933c9c0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -295,7 +295,8 @@ static inline int bad_range(struct zone *zone, struct page *page)
295} 295}
296#endif 296#endif
297 297
298static void bad_page(struct page *page, char *reason, unsigned long bad_flags) 298static void bad_page(struct page *page, const char *reason,
299 unsigned long bad_flags)
299{ 300{
300 static unsigned long resume; 301 static unsigned long resume;
301 static unsigned long nr_shown; 302 static unsigned long nr_shown;
@@ -623,7 +624,7 @@ out:
623 624
624static inline int free_pages_check(struct page *page) 625static inline int free_pages_check(struct page *page)
625{ 626{
626 char *bad_reason = NULL; 627 const char *bad_reason = NULL;
627 unsigned long bad_flags = 0; 628 unsigned long bad_flags = 0;
628 629
629 if (unlikely(page_mapcount(page))) 630 if (unlikely(page_mapcount(page)))
@@ -859,7 +860,7 @@ static inline void expand(struct zone *zone, struct page *page,
859 */ 860 */
860static inline int check_new_page(struct page *page) 861static inline int check_new_page(struct page *page)
861{ 862{
862 char *bad_reason = NULL; 863 const char *bad_reason = NULL;
863 unsigned long bad_flags = 0; 864 unsigned long bad_flags = 0;
864 865
865 if (unlikely(page_mapcount(page))) 866 if (unlikely(page_mapcount(page)))
@@ -1238,15 +1239,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
1238 } 1239 }
1239 local_irq_restore(flags); 1240 local_irq_restore(flags);
1240} 1241}
1241static bool gfp_thisnode_allocation(gfp_t gfp_mask)
1242{
1243 return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
1244}
1245#else
1246static bool gfp_thisnode_allocation(gfp_t gfp_mask)
1247{
1248 return false;
1249}
1250#endif 1242#endif
1251 1243
1252/* 1244/*
@@ -1583,12 +1575,7 @@ again:
1583 get_pageblock_migratetype(page)); 1575 get_pageblock_migratetype(page));
1584 } 1576 }
1585 1577
1586 /* 1578 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1587 * NOTE: GFP_THISNODE allocations do not partake in the kswapd
1588 * aging protocol, so they can't be fair.
1589 */
1590 if (!gfp_thisnode_allocation(gfp_flags))
1591 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1592 1579
1593 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1580 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1594 zone_statistics(preferred_zone, zone, gfp_flags); 1581 zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1870,7 +1857,7 @@ static void __paginginit init_zone_allows_reclaim(int nid)
1870{ 1857{
1871 int i; 1858 int i;
1872 1859
1873 for_each_online_node(i) 1860 for_each_node_state(i, N_MEMORY)
1874 if (node_distance(nid, i) <= RECLAIM_DISTANCE) 1861 if (node_distance(nid, i) <= RECLAIM_DISTANCE)
1875 node_set(i, NODE_DATA(nid)->reclaim_nodes); 1862 node_set(i, NODE_DATA(nid)->reclaim_nodes);
1876 else 1863 else
@@ -1954,23 +1941,12 @@ zonelist_scan:
1954 * zone size to ensure fair page aging. The zone a 1941 * zone size to ensure fair page aging. The zone a
1955 * page was allocated in should have no effect on the 1942 * page was allocated in should have no effect on the
1956 * time the page has in memory before being reclaimed. 1943 * time the page has in memory before being reclaimed.
1957 *
1958 * Try to stay in local zones in the fastpath. If
1959 * that fails, the slowpath is entered, which will do
1960 * another pass starting with the local zones, but
1961 * ultimately fall back to remote zones that do not
1962 * partake in the fairness round-robin cycle of this
1963 * zonelist.
1964 *
1965 * NOTE: GFP_THISNODE allocations do not partake in
1966 * the kswapd aging protocol, so they can't be fair.
1967 */ 1944 */
1968 if ((alloc_flags & ALLOC_WMARK_LOW) && 1945 if (alloc_flags & ALLOC_FAIR) {
1969 !gfp_thisnode_allocation(gfp_mask)) {
1970 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
1971 continue;
1972 if (!zone_local(preferred_zone, zone)) 1946 if (!zone_local(preferred_zone, zone))
1973 continue; 1947 continue;
1948 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
1949 continue;
1974 } 1950 }
1975 /* 1951 /*
1976 * When allocating a page cache page for writing, we 1952 * When allocating a page cache page for writing, we
@@ -2408,32 +2384,40 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
2408 return page; 2384 return page;
2409} 2385}
2410 2386
2411static void prepare_slowpath(gfp_t gfp_mask, unsigned int order, 2387static void reset_alloc_batches(struct zonelist *zonelist,
2412 struct zonelist *zonelist, 2388 enum zone_type high_zoneidx,
2413 enum zone_type high_zoneidx, 2389 struct zone *preferred_zone)
2414 struct zone *preferred_zone)
2415{ 2390{
2416 struct zoneref *z; 2391 struct zoneref *z;
2417 struct zone *zone; 2392 struct zone *zone;
2418 2393
2419 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { 2394 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
2420 if (!(gfp_mask & __GFP_NO_KSWAPD))
2421 wakeup_kswapd(zone, order, zone_idx(preferred_zone));
2422 /* 2395 /*
2423 * Only reset the batches of zones that were actually 2396 * Only reset the batches of zones that were actually
2424 * considered in the fast path, we don't want to 2397 * considered in the fairness pass, we don't want to
2425 * thrash fairness information for zones that are not 2398 * trash fairness information for zones that are not
2426 * actually part of this zonelist's round-robin cycle. 2399 * actually part of this zonelist's round-robin cycle.
2427 */ 2400 */
2428 if (!zone_local(preferred_zone, zone)) 2401 if (!zone_local(preferred_zone, zone))
2429 continue; 2402 continue;
2430 mod_zone_page_state(zone, NR_ALLOC_BATCH, 2403 mod_zone_page_state(zone, NR_ALLOC_BATCH,
2431 high_wmark_pages(zone) - 2404 high_wmark_pages(zone) - low_wmark_pages(zone) -
2432 low_wmark_pages(zone) - 2405 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
2433 zone_page_state(zone, NR_ALLOC_BATCH));
2434 } 2406 }
2435} 2407}
2436 2408
2409static void wake_all_kswapds(unsigned int order,
2410 struct zonelist *zonelist,
2411 enum zone_type high_zoneidx,
2412 struct zone *preferred_zone)
2413{
2414 struct zoneref *z;
2415 struct zone *zone;
2416
2417 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
2418 wakeup_kswapd(zone, order, zone_idx(preferred_zone));
2419}
2420
2437static inline int 2421static inline int
2438gfp_to_alloc_flags(gfp_t gfp_mask) 2422gfp_to_alloc_flags(gfp_t gfp_mask)
2439{ 2423{
@@ -2522,12 +2506,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2522 * allowed per node queues are empty and that nodes are 2506 * allowed per node queues are empty and that nodes are
2523 * over allocated. 2507 * over allocated.
2524 */ 2508 */
2525 if (gfp_thisnode_allocation(gfp_mask)) 2509 if (IS_ENABLED(CONFIG_NUMA) &&
2510 (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
2526 goto nopage; 2511 goto nopage;
2527 2512
2528restart: 2513restart:
2529 prepare_slowpath(gfp_mask, order, zonelist, 2514 if (!(gfp_mask & __GFP_NO_KSWAPD))
2530 high_zoneidx, preferred_zone); 2515 wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
2531 2516
2532 /* 2517 /*
2533 * OK, we're below the kswapd watermark and have kicked background 2518 * OK, we're below the kswapd watermark and have kicked background
@@ -2711,7 +2696,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
2711 struct page *page = NULL; 2696 struct page *page = NULL;
2712 int migratetype = allocflags_to_migratetype(gfp_mask); 2697 int migratetype = allocflags_to_migratetype(gfp_mask);
2713 unsigned int cpuset_mems_cookie; 2698 unsigned int cpuset_mems_cookie;
2714 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET; 2699 int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
2715 struct mem_cgroup *memcg = NULL; 2700 struct mem_cgroup *memcg = NULL;
2716 2701
2717 gfp_mask &= gfp_allowed_mask; 2702 gfp_mask &= gfp_allowed_mask;
@@ -2752,12 +2737,29 @@ retry_cpuset:
2752 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) 2737 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
2753 alloc_flags |= ALLOC_CMA; 2738 alloc_flags |= ALLOC_CMA;
2754#endif 2739#endif
2740retry:
2755 /* First allocation attempt */ 2741 /* First allocation attempt */
2756 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2742 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
2757 zonelist, high_zoneidx, alloc_flags, 2743 zonelist, high_zoneidx, alloc_flags,
2758 preferred_zone, migratetype); 2744 preferred_zone, migratetype);
2759 if (unlikely(!page)) { 2745 if (unlikely(!page)) {
2760 /* 2746 /*
2747 * The first pass makes sure allocations are spread
2748 * fairly within the local node. However, the local
2749 * node might have free pages left after the fairness
2750 * batches are exhausted, and remote zones haven't
2751 * even been considered yet. Try once more without
2752 * fairness, and include remote zones now, before
2753 * entering the slowpath and waking kswapd: prefer
2754 * spilling to a remote zone over swapping locally.
2755 */
2756 if (alloc_flags & ALLOC_FAIR) {
2757 reset_alloc_batches(zonelist, high_zoneidx,
2758 preferred_zone);
2759 alloc_flags &= ~ALLOC_FAIR;
2760 goto retry;
2761 }
2762 /*
2761 * Runtime PM, block IO and its error handling path 2763 * Runtime PM, block IO and its error handling path
2762 * can deadlock because I/O on the device might not 2764 * can deadlock because I/O on the device might not
2763 * complete. 2765 * complete.
@@ -4919,7 +4921,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
4919 4921
4920 pgdat->node_id = nid; 4922 pgdat->node_id = nid;
4921 pgdat->node_start_pfn = node_start_pfn; 4923 pgdat->node_start_pfn = node_start_pfn;
4922 init_zone_allows_reclaim(nid); 4924 if (node_state(nid, N_MEMORY))
4925 init_zone_allows_reclaim(nid);
4923#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP 4926#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4924 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 4927 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
4925#endif 4928#endif
@@ -5070,7 +5073,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
5070 nodemask_t saved_node_state = node_states[N_MEMORY]; 5073 nodemask_t saved_node_state = node_states[N_MEMORY];
5071 unsigned long totalpages = early_calculate_totalpages(); 5074 unsigned long totalpages = early_calculate_totalpages();
5072 int usable_nodes = nodes_weight(node_states[N_MEMORY]); 5075 int usable_nodes = nodes_weight(node_states[N_MEMORY]);
5073 struct memblock_type *type = &memblock.memory; 5076 struct memblock_region *r;
5074 5077
5075 /* Need to find movable_zone earlier when movable_node is specified. */ 5078 /* Need to find movable_zone earlier when movable_node is specified. */
5076 find_usable_zone_for_movable(); 5079 find_usable_zone_for_movable();
@@ -5080,13 +5083,13 @@ static void __init find_zone_movable_pfns_for_nodes(void)
5080 * options. 5083 * options.
5081 */ 5084 */
5082 if (movable_node_is_enabled()) { 5085 if (movable_node_is_enabled()) {
5083 for (i = 0; i < type->cnt; i++) { 5086 for_each_memblock(memory, r) {
5084 if (!memblock_is_hotpluggable(&type->regions[i])) 5087 if (!memblock_is_hotpluggable(r))
5085 continue; 5088 continue;
5086 5089
5087 nid = type->regions[i].nid; 5090 nid = r->nid;
5088 5091
5089 usable_startpfn = PFN_DOWN(type->regions[i].base); 5092 usable_startpfn = PFN_DOWN(r->base);
5090 zone_movable_pfn[nid] = zone_movable_pfn[nid] ? 5093 zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
5091 min(usable_startpfn, zone_movable_pfn[nid]) : 5094 min(usable_startpfn, zone_movable_pfn[nid]) :
5092 usable_startpfn; 5095 usable_startpfn;
@@ -6544,7 +6547,8 @@ static void dump_page_flags(unsigned long flags)
6544 printk(")\n"); 6547 printk(")\n");
6545} 6548}
6546 6549
6547void dump_page_badflags(struct page *page, char *reason, unsigned long badflags) 6550void dump_page_badflags(struct page *page, const char *reason,
6551 unsigned long badflags)
6548{ 6552{
6549 printk(KERN_ALERT 6553 printk(KERN_ALERT
6550 "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", 6554 "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
@@ -6560,8 +6564,8 @@ void dump_page_badflags(struct page *page, char *reason, unsigned long badflags)
6560 mem_cgroup_print_bad_page(page); 6564 mem_cgroup_print_bad_page(page);
6561} 6565}
6562 6566
6563void dump_page(struct page *page, char *reason) 6567void dump_page(struct page *page, const char *reason)
6564{ 6568{
6565 dump_page_badflags(page, reason, 0); 6569 dump_page_badflags(page, reason, 0);
6566} 6570}
6567EXPORT_SYMBOL_GPL(dump_page); 6571EXPORT_SYMBOL(dump_page);