aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2014-09-11 08:46:53 -0400
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-09-11 08:46:53 -0400
commit336879b1da97fffc097f77c6d6f818660f2826f0 (patch)
tree4ddb4d1c5d2b67fb096c72e41d2a03b01a605041 /mm/page_alloc.c
parent3d3cbd84300e7be1e53083cac0f6f9c12978ecb4 (diff)
parentfdcaa1dbb7c6ed419b10fb8cdb5001ab0a00538f (diff)
Merge remote-tracking branch 'airlied/drm-next' into topic/vblank-rework
Dave asked me to do the backmerge before sending him the revised pull request, so here we go. Nothing fancy in the conflicts, just a few things changed right next to each another. Conflicts: drivers/gpu/drm/drm_irq.c Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c159
1 files changed, 83 insertions, 76 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef44ad736ca1..18cee0d4c8a2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -680,9 +680,12 @@ static void free_pcppages_bulk(struct zone *zone, int count,
680 int migratetype = 0; 680 int migratetype = 0;
681 int batch_free = 0; 681 int batch_free = 0;
682 int to_free = count; 682 int to_free = count;
683 unsigned long nr_scanned;
683 684
684 spin_lock(&zone->lock); 685 spin_lock(&zone->lock);
685 zone->pages_scanned = 0; 686 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
687 if (nr_scanned)
688 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
686 689
687 while (to_free) { 690 while (to_free) {
688 struct page *page; 691 struct page *page;
@@ -731,8 +734,11 @@ static void free_one_page(struct zone *zone,
731 unsigned int order, 734 unsigned int order,
732 int migratetype) 735 int migratetype)
733{ 736{
737 unsigned long nr_scanned;
734 spin_lock(&zone->lock); 738 spin_lock(&zone->lock);
735 zone->pages_scanned = 0; 739 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED);
740 if (nr_scanned)
741 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
736 742
737 __free_one_page(page, pfn, zone, order, migratetype); 743 __free_one_page(page, pfn, zone, order, migratetype);
738 if (unlikely(!is_migrate_isolate(migratetype))) 744 if (unlikely(!is_migrate_isolate(migratetype)))
@@ -1257,15 +1263,11 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
1257void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) 1263void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
1258{ 1264{
1259 unsigned long flags; 1265 unsigned long flags;
1260 int to_drain; 1266 int to_drain, batch;
1261 unsigned long batch;
1262 1267
1263 local_irq_save(flags); 1268 local_irq_save(flags);
1264 batch = ACCESS_ONCE(pcp->batch); 1269 batch = ACCESS_ONCE(pcp->batch);
1265 if (pcp->count >= batch) 1270 to_drain = min(pcp->count, batch);
1266 to_drain = batch;
1267 else
1268 to_drain = pcp->count;
1269 if (to_drain > 0) { 1271 if (to_drain > 0) {
1270 free_pcppages_bulk(zone, to_drain, pcp); 1272 free_pcppages_bulk(zone, to_drain, pcp);
1271 pcp->count -= to_drain; 1273 pcp->count -= to_drain;
@@ -1610,6 +1612,9 @@ again:
1610 } 1612 }
1611 1613
1612 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order)); 1614 __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
1615 if (zone_page_state(zone, NR_ALLOC_BATCH) == 0 &&
1616 !zone_is_fair_depleted(zone))
1617 zone_set_flag(zone, ZONE_FAIR_DEPLETED);
1613 1618
1614 __count_zone_vm_events(PGALLOC, zone, 1 << order); 1619 __count_zone_vm_events(PGALLOC, zone, 1 << order);
1615 zone_statistics(preferred_zone, zone, gfp_flags); 1620 zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1712,7 +1717,6 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
1712{ 1717{
1713 /* free_pages my go negative - that's OK */ 1718 /* free_pages my go negative - that's OK */
1714 long min = mark; 1719 long min = mark;
1715 long lowmem_reserve = z->lowmem_reserve[classzone_idx];
1716 int o; 1720 int o;
1717 long free_cma = 0; 1721 long free_cma = 0;
1718 1722
@@ -1727,7 +1731,7 @@ static bool __zone_watermark_ok(struct zone *z, unsigned int order,
1727 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES); 1731 free_cma = zone_page_state(z, NR_FREE_CMA_PAGES);
1728#endif 1732#endif
1729 1733
1730 if (free_pages - free_cma <= min + lowmem_reserve) 1734 if (free_pages - free_cma <= min + z->lowmem_reserve[classzone_idx])
1731 return false; 1735 return false;
1732 for (o = 0; o < order; o++) { 1736 for (o = 0; o < order; o++) {
1733 /* At the next order, this order's pages become unavailable */ 1737 /* At the next order, this order's pages become unavailable */
@@ -1922,6 +1926,18 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
1922 1926
1923#endif /* CONFIG_NUMA */ 1927#endif /* CONFIG_NUMA */
1924 1928
1929static void reset_alloc_batches(struct zone *preferred_zone)
1930{
1931 struct zone *zone = preferred_zone->zone_pgdat->node_zones;
1932
1933 do {
1934 mod_zone_page_state(zone, NR_ALLOC_BATCH,
1935 high_wmark_pages(zone) - low_wmark_pages(zone) -
1936 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
1937 zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
1938 } while (zone++ != preferred_zone);
1939}
1940
1925/* 1941/*
1926 * get_page_from_freelist goes through the zonelist trying to allocate 1942 * get_page_from_freelist goes through the zonelist trying to allocate
1927 * a page. 1943 * a page.
@@ -1939,8 +1955,12 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
1939 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1955 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1940 bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) && 1956 bool consider_zone_dirty = (alloc_flags & ALLOC_WMARK_LOW) &&
1941 (gfp_mask & __GFP_WRITE); 1957 (gfp_mask & __GFP_WRITE);
1958 int nr_fair_skipped = 0;
1959 bool zonelist_rescan;
1942 1960
1943zonelist_scan: 1961zonelist_scan:
1962 zonelist_rescan = false;
1963
1944 /* 1964 /*
1945 * Scan zonelist, looking for a zone with enough free. 1965 * Scan zonelist, looking for a zone with enough free.
1946 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c. 1966 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
@@ -1964,9 +1984,11 @@ zonelist_scan:
1964 */ 1984 */
1965 if (alloc_flags & ALLOC_FAIR) { 1985 if (alloc_flags & ALLOC_FAIR) {
1966 if (!zone_local(preferred_zone, zone)) 1986 if (!zone_local(preferred_zone, zone))
1987 break;
1988 if (zone_is_fair_depleted(zone)) {
1989 nr_fair_skipped++;
1967 continue; 1990 continue;
1968 if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0) 1991 }
1969 continue;
1970 } 1992 }
1971 /* 1993 /*
1972 * When allocating a page cache page for writing, we 1994 * When allocating a page cache page for writing, we
@@ -2072,13 +2094,7 @@ this_zone_full:
2072 zlc_mark_zone_full(zonelist, z); 2094 zlc_mark_zone_full(zonelist, z);
2073 } 2095 }
2074 2096
2075 if (unlikely(IS_ENABLED(CONFIG_NUMA) && page == NULL && zlc_active)) { 2097 if (page) {
2076 /* Disable zlc cache for second zonelist scan */
2077 zlc_active = 0;
2078 goto zonelist_scan;
2079 }
2080
2081 if (page)
2082 /* 2098 /*
2083 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was 2099 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
2084 * necessary to allocate the page. The expectation is 2100 * necessary to allocate the page. The expectation is
@@ -2087,8 +2103,37 @@ this_zone_full:
2087 * for !PFMEMALLOC purposes. 2103 * for !PFMEMALLOC purposes.
2088 */ 2104 */
2089 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); 2105 page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
2106 return page;
2107 }
2090 2108
2091 return page; 2109 /*
2110 * The first pass makes sure allocations are spread fairly within the
2111 * local node. However, the local node might have free pages left
2112 * after the fairness batches are exhausted, and remote zones haven't
2113 * even been considered yet. Try once more without fairness, and
2114 * include remote zones now, before entering the slowpath and waking
2115 * kswapd: prefer spilling to a remote zone over swapping locally.
2116 */
2117 if (alloc_flags & ALLOC_FAIR) {
2118 alloc_flags &= ~ALLOC_FAIR;
2119 if (nr_fair_skipped) {
2120 zonelist_rescan = true;
2121 reset_alloc_batches(preferred_zone);
2122 }
2123 if (nr_online_nodes > 1)
2124 zonelist_rescan = true;
2125 }
2126
2127 if (unlikely(IS_ENABLED(CONFIG_NUMA) && zlc_active)) {
2128 /* Disable zlc cache for second zonelist scan */
2129 zlc_active = 0;
2130 zonelist_rescan = true;
2131 }
2132
2133 if (zonelist_rescan)
2134 goto zonelist_scan;
2135
2136 return NULL;
2092} 2137}
2093 2138
2094/* 2139/*
@@ -2201,8 +2246,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2201{ 2246{
2202 struct page *page; 2247 struct page *page;
2203 2248
2204 /* Acquire the OOM killer lock for the zones in zonelist */ 2249 /* Acquire the per-zone oom lock for each zone */
2205 if (!try_set_zonelist_oom(zonelist, gfp_mask)) { 2250 if (!oom_zonelist_trylock(zonelist, gfp_mask)) {
2206 schedule_timeout_uninterruptible(1); 2251 schedule_timeout_uninterruptible(1);
2207 return NULL; 2252 return NULL;
2208 } 2253 }
@@ -2240,7 +2285,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
2240 out_of_memory(zonelist, gfp_mask, order, nodemask, false); 2285 out_of_memory(zonelist, gfp_mask, order, nodemask, false);
2241 2286
2242out: 2287out:
2243 clear_zonelist_oom(zonelist, gfp_mask); 2288 oom_zonelist_unlock(zonelist, gfp_mask);
2244 return page; 2289 return page;
2245} 2290}
2246 2291
@@ -2409,28 +2454,6 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
2409 return page; 2454 return page;
2410} 2455}
2411 2456
2412static void reset_alloc_batches(struct zonelist *zonelist,
2413 enum zone_type high_zoneidx,
2414 struct zone *preferred_zone)
2415{
2416 struct zoneref *z;
2417 struct zone *zone;
2418
2419 for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
2420 /*
2421 * Only reset the batches of zones that were actually
2422 * considered in the fairness pass, we don't want to
2423 * trash fairness information for zones that are not
2424 * actually part of this zonelist's round-robin cycle.
2425 */
2426 if (!zone_local(preferred_zone, zone))
2427 continue;
2428 mod_zone_page_state(zone, NR_ALLOC_BATCH,
2429 high_wmark_pages(zone) - low_wmark_pages(zone) -
2430 atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
2431 }
2432}
2433
2434static void wake_all_kswapds(unsigned int order, 2457static void wake_all_kswapds(unsigned int order,
2435 struct zonelist *zonelist, 2458 struct zonelist *zonelist,
2436 enum zone_type high_zoneidx, 2459 enum zone_type high_zoneidx,
@@ -2616,14 +2639,6 @@ rebalance:
2616 goto got_pg; 2639 goto got_pg;
2617 2640
2618 /* 2641 /*
2619 * It can become very expensive to allocate transparent hugepages at
2620 * fault, so use asynchronous memory compaction for THP unless it is
2621 * khugepaged trying to collapse.
2622 */
2623 if (!(gfp_mask & __GFP_NO_KSWAPD) || (current->flags & PF_KTHREAD))
2624 migration_mode = MIGRATE_SYNC_LIGHT;
2625
2626 /*
2627 * If compaction is deferred for high-order allocations, it is because 2642 * If compaction is deferred for high-order allocations, it is because
2628 * sync compaction recently failed. In this is the case and the caller 2643 * sync compaction recently failed. In this is the case and the caller
2629 * requested a movable allocation that does not heavily disrupt the 2644 * requested a movable allocation that does not heavily disrupt the
@@ -2633,6 +2648,15 @@ rebalance:
2633 (gfp_mask & __GFP_NO_KSWAPD)) 2648 (gfp_mask & __GFP_NO_KSWAPD))
2634 goto nopage; 2649 goto nopage;
2635 2650
2651 /*
2652 * It can become very expensive to allocate transparent hugepages at
2653 * fault, so use asynchronous memory compaction for THP unless it is
2654 * khugepaged trying to collapse.
2655 */
2656 if ((gfp_mask & GFP_TRANSHUGE) != GFP_TRANSHUGE ||
2657 (current->flags & PF_KTHREAD))
2658 migration_mode = MIGRATE_SYNC_LIGHT;
2659
2636 /* Try direct reclaim and then allocating */ 2660 /* Try direct reclaim and then allocating */
2637 page = __alloc_pages_direct_reclaim(gfp_mask, order, 2661 page = __alloc_pages_direct_reclaim(gfp_mask, order,
2638 zonelist, high_zoneidx, 2662 zonelist, high_zoneidx,
@@ -2766,29 +2790,12 @@ retry_cpuset:
2766 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE) 2790 if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
2767 alloc_flags |= ALLOC_CMA; 2791 alloc_flags |= ALLOC_CMA;
2768#endif 2792#endif
2769retry:
2770 /* First allocation attempt */ 2793 /* First allocation attempt */
2771 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, 2794 page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
2772 zonelist, high_zoneidx, alloc_flags, 2795 zonelist, high_zoneidx, alloc_flags,
2773 preferred_zone, classzone_idx, migratetype); 2796 preferred_zone, classzone_idx, migratetype);
2774 if (unlikely(!page)) { 2797 if (unlikely(!page)) {
2775 /* 2798 /*
2776 * The first pass makes sure allocations are spread
2777 * fairly within the local node. However, the local
2778 * node might have free pages left after the fairness
2779 * batches are exhausted, and remote zones haven't
2780 * even been considered yet. Try once more without
2781 * fairness, and include remote zones now, before
2782 * entering the slowpath and waking kswapd: prefer
2783 * spilling to a remote zone over swapping locally.
2784 */
2785 if (alloc_flags & ALLOC_FAIR) {
2786 reset_alloc_batches(zonelist, high_zoneidx,
2787 preferred_zone);
2788 alloc_flags &= ~ALLOC_FAIR;
2789 goto retry;
2790 }
2791 /*
2792 * Runtime PM, block IO and its error handling path 2799 * Runtime PM, block IO and its error handling path
2793 * can deadlock because I/O on the device might not 2800 * can deadlock because I/O on the device might not
2794 * complete. 2801 * complete.
@@ -2962,7 +2969,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
2962 * Note this is not alloc_pages_exact_node() which allocates on a specific node, 2969 * Note this is not alloc_pages_exact_node() which allocates on a specific node,
2963 * but is not exact. 2970 * but is not exact.
2964 */ 2971 */
2965void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) 2972void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
2966{ 2973{
2967 unsigned order = get_order(size); 2974 unsigned order = get_order(size);
2968 struct page *p = alloc_pages_node(nid, gfp_mask, order); 2975 struct page *p = alloc_pages_node(nid, gfp_mask, order);
@@ -2970,7 +2977,6 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
2970 return NULL; 2977 return NULL;
2971 return make_alloc_exact((unsigned long)page_address(p), order, size); 2978 return make_alloc_exact((unsigned long)page_address(p), order, size);
2972} 2979}
2973EXPORT_SYMBOL(alloc_pages_exact_nid);
2974 2980
2975/** 2981/**
2976 * free_pages_exact - release memory allocated via alloc_pages_exact() 2982 * free_pages_exact - release memory allocated via alloc_pages_exact()
@@ -3052,7 +3058,7 @@ static inline void show_node(struct zone *zone)
3052void si_meminfo(struct sysinfo *val) 3058void si_meminfo(struct sysinfo *val)
3053{ 3059{
3054 val->totalram = totalram_pages; 3060 val->totalram = totalram_pages;
3055 val->sharedram = 0; 3061 val->sharedram = global_page_state(NR_SHMEM);
3056 val->freeram = global_page_state(NR_FREE_PAGES); 3062 val->freeram = global_page_state(NR_FREE_PAGES);
3057 val->bufferram = nr_blockdev_pages(); 3063 val->bufferram = nr_blockdev_pages();
3058 val->totalhigh = totalhigh_pages; 3064 val->totalhigh = totalhigh_pages;
@@ -3072,6 +3078,7 @@ void si_meminfo_node(struct sysinfo *val, int nid)
3072 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) 3078 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
3073 managed_pages += pgdat->node_zones[zone_type].managed_pages; 3079 managed_pages += pgdat->node_zones[zone_type].managed_pages;
3074 val->totalram = managed_pages; 3080 val->totalram = managed_pages;
3081 val->sharedram = node_page_state(nid, NR_SHMEM);
3075 val->freeram = node_page_state(nid, NR_FREE_PAGES); 3082 val->freeram = node_page_state(nid, NR_FREE_PAGES);
3076#ifdef CONFIG_HIGHMEM 3083#ifdef CONFIG_HIGHMEM
3077 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages; 3084 val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3253,12 +3260,12 @@ void show_free_areas(unsigned int filter)
3253 K(zone_page_state(zone, NR_BOUNCE)), 3260 K(zone_page_state(zone, NR_BOUNCE)),
3254 K(zone_page_state(zone, NR_FREE_CMA_PAGES)), 3261 K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
3255 K(zone_page_state(zone, NR_WRITEBACK_TEMP)), 3262 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
3256 zone->pages_scanned, 3263 K(zone_page_state(zone, NR_PAGES_SCANNED)),
3257 (!zone_reclaimable(zone) ? "yes" : "no") 3264 (!zone_reclaimable(zone) ? "yes" : "no")
3258 ); 3265 );
3259 printk("lowmem_reserve[]:"); 3266 printk("lowmem_reserve[]:");
3260 for (i = 0; i < MAX_NR_ZONES; i++) 3267 for (i = 0; i < MAX_NR_ZONES; i++)
3261 printk(" %lu", zone->lowmem_reserve[i]); 3268 printk(" %ld", zone->lowmem_reserve[i]);
3262 printk("\n"); 3269 printk("\n");
3263 } 3270 }
3264 3271
@@ -5579,7 +5586,7 @@ static void calculate_totalreserve_pages(void)
5579 for_each_online_pgdat(pgdat) { 5586 for_each_online_pgdat(pgdat) {
5580 for (i = 0; i < MAX_NR_ZONES; i++) { 5587 for (i = 0; i < MAX_NR_ZONES; i++) {
5581 struct zone *zone = pgdat->node_zones + i; 5588 struct zone *zone = pgdat->node_zones + i;
5582 unsigned long max = 0; 5589 long max = 0;
5583 5590
5584 /* Find valid and maximum lowmem_reserve in the zone */ 5591 /* Find valid and maximum lowmem_reserve in the zone */
5585 for (j = i; j < MAX_NR_ZONES; j++) { 5592 for (j = i; j < MAX_NR_ZONES; j++) {