4 files changed, 132 insertions, 65 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index e78cb9688421..7fcd3a52e68d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -51,6 +51,47 @@ static inline bool migrate_async_suitable(int migratetype)
 }
 /*
+ * Compaction requires the taking of some coarse locks that are potentially
+ * very heavily contended. Check if the process needs to be scheduled or
+ * if the lock is contended. For async compaction, back out in the event
+ * if contention is severe. For sync compaction, schedule.
+ *
+ * Returns true if the lock is held.
+ * Returns false if the lock is released and compaction should abort
+ */
+static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
+                                      bool locked, struct compact_control *cc)
+{
+        if (need_resched() || spin_is_contended(lock)) {
+                if (locked) {
+                        spin_unlock_irqrestore(lock, *flags);
+                        locked = false;
+                }
+                /* async aborts if taking too long or contended */
+                if (!cc->sync) {
+                        if (cc->contended)
+                                *cc->contended = true;
+                        return false;
+                }
+                cond_resched();
+                if (fatal_signal_pending(current))
+                        return false;
+        }
+        if (!locked)
+                spin_lock_irqsave(lock, *flags);
+        return true;
+}
+static inline bool compact_trylock_irqsave(spinlock_t *lock,
+                        unsigned long *flags, struct compact_control *cc)
+{
+        return compact_checklock_irqsave(lock, flags, false, cc);
+}
+/*
 * Isolate free pages onto a private freelist. Caller must hold zone->lock.
 * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
 * pages inside of the pageblock (even though it may still end up isolating
@@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
 }
 /* Update the number of anon and file isolated pages in the zone */
-static void acct_isolated(struct zone *zone, struct compact_control *cc)
+static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc)
 {
        struct page *page;
        unsigned int count[2] = { 0, };
@@ -181,8 +222,14 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
        list_for_each_entry(page, &cc->migratepages, lru)
                count[!!page_is_file_cache(page)]++;
-        __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+        /* If locked we can use the interrupt unsafe versions */
-        __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+        if (locked) {
+                __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+                __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+        } else {
+                mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
+                mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+        }
 }
 /* Similar to reclaim, but different enough that they don't share logic */
@@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
        struct list_head *migratelist = &cc->migratepages;
        isolate_mode_t mode = 0;
        struct lruvec *lruvec;
+        unsigned long flags;
+        bool locked;
        /*
         * Ensure that there are not too many pages isolated from the LRU
@@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
        /* Time to isolate some pages for migration */
        cond_resched();
-        spin_lock_irq(&zone->lru_lock);
+        spin_lock_irqsave(&zone->lru_lock, flags);
+        locked = true;
        for (; low_pfn < end_pfn; low_pfn++) {
                struct page *page;
-                bool locked = true;
                /* give a chance to irqs before checking need_resched() */
                if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
-                        spin_unlock_irq(&zone->lru_lock);
+                        spin_unlock_irqrestore(&zone->lru_lock, flags);
                        locked = false;
                }
-                if (need_resched() || spin_is_contended(&zone->lru_lock)) {
-                        if (locked)
+                /* Check if it is ok to still hold the lock */
-                                spin_unlock_irq(&zone->lru_lock);
+                locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
-                        cond_resched();
+                                                                locked, cc);
-                        spin_lock_irq(&zone->lru_lock);
+                if (!locked)
-                        if (fatal_signal_pending(current))
+                        break;
-                                break;
-                } else if (!locked)
-                        spin_lock_irq(&zone->lru_lock);
                /*
                 * migrate_pfn does not necessarily start aligned to a
@@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
                }
        }
-        acct_isolated(zone, cc);
+        acct_isolated(zone, locked, cc);
-        spin_unlock_irq(&zone->lru_lock);
+        if (locked)
+                spin_unlock_irqrestore(&zone->lru_lock, flags);
        trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
@@ -384,6 +431,20 @@ static bool suitable_migration_target(struct page *page)
 }
 /*
+ * Returns the start pfn of the last page block in a zone.  This is the starting
+ * point for full compaction of a zone.  Compaction searches for free pages from
+ * the end of each zone, while isolate_freepages_block scans forward inside each
+ * page block.
+ */
+static unsigned long start_free_pfn(struct zone *zone)
+{
+        unsigned long free_pfn;
+        free_pfn = zone->zone_start_pfn + zone->spanned_pages;
+        free_pfn &= ~(pageblock_nr_pages-1);
+        return free_pfn;
+}
+/*
 * Based on information in the current compact_control, find blocks
 * suitable for isolating free pages from and then isolate them.
 */
@@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone,
                                        pfn -= pageblock_nr_pages) {
                unsigned long isolated;
-                /*
-                 * Skip ahead if another thread is compacting in the area
-                 * simultaneously. If we wrapped around, we can only skip
-                 * ahead if zone->compact_cached_free_pfn also wrapped to
-                 * above our starting point.
-                 */
-                if (cc->order > 0 && (!cc->wrapped ||
-                                      zone->compact_cached_free_pfn >
-                                      cc->start_free_pfn))
-                        pfn = min(pfn, zone->compact_cached_free_pfn);
                if (!pfn_valid(pfn))
                        continue;
@@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone,
                 * are disabled
                 */
                isolated = 0;
-                spin_lock_irqsave(&zone->lock, flags);
+                /*
+                 * The zone lock must be held to isolate freepages. This
+                 * unfortunately this is a very coarse lock and can be
+                 * heavily contended if there are parallel allocations
+                 * or parallel compactions. For async compaction do not
+                 * spin on the lock
+                 */
+                if (!compact_trylock_irqsave(&zone->lock, &flags, cc))
+                        break;
                if (suitable_migration_target(page)) {
                        end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
                        isolated = isolate_freepages_block(pfn, end_pfn,
@@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone,
                 */
                if (isolated) {
                        high_pfn = max(high_pfn, pfn);
-                        if (cc->order > 0)
+                        /*
+                         * If the free scanner has wrapped, update
+                         * compact_cached_free_pfn to point to the highest
+                         * pageblock with free pages. This reduces excessive
+                         * scanning of full pageblocks near the end of the
+                         * zone
+                         */
+                        if (cc->order > 0 && cc->wrapped)
                                zone->compact_cached_free_pfn = high_pfn;
                }
        }
@@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone,
        cc->free_pfn = high_pfn;
        cc->nr_freepages = nr_freepages;
+        /* If compact_cached_free_pfn is reset then set it now */
+        if (cc->order > 0 && !cc->wrapped &&
+                        zone->compact_cached_free_pfn == start_free_pfn(zone))
+                zone->compact_cached_free_pfn = high_pfn;
 }
 /*
@@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
        return ISOLATE_SUCCESS;
 }
-/*
- * Returns the start pfn of the last page block in a zone.  This is the starting
- * point for full compaction of a zone.  Compaction searches for free pages from
- * the end of each zone, while isolate_freepages_block scans forward inside each
- * page block.
- */
-static unsigned long start_free_pfn(struct zone *zone)
-{
-        unsigned long free_pfn;
-        free_pfn = zone->zone_start_pfn + zone->spanned_pages;
-        free_pfn &= ~(pageblock_nr_pages-1);
-        return free_pfn;
-}
 static int compact_finished(struct zone *zone,
                            struct compact_control *cc)
 {
@@ -771,7 +829,7 @@ out:
 static unsigned long compact_zone_order(struct zone *zone,
                                 int order, gfp_t gfp_mask,
-                                 bool sync)
+                                 bool sync, bool *contended)
 {
        struct compact_control cc = {
                .nr_freepages = 0,
@@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone,
                .migratetype = allocflags_to_migratetype(gfp_mask),
                .zone = zone,
                .sync = sync,
+                .contended = contended,
        };
        INIT_LIST_HEAD(&cc.freepages);
        INIT_LIST_HEAD(&cc.migratepages);
@@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500;
 */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
                        int order, gfp_t gfp_mask, nodemask_t *nodemask,
-                        bool sync)
+                        bool sync, bool *contended)
 {
        enum zone_type high_zoneidx = gfp_zone(gfp_mask);
        int may_enter_fs = gfp_mask & __GFP_FS;
@@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
                                                                nodemask) {
                int status;
-                status = compact_zone_order(zone, order, gfp_mask, sync);
+                status = compact_zone_order(zone, order, gfp_mask, sync,
+                                                contended);
                rc = max(status, rc);
                /* If a normal allocation would succeed, stop compacting */
@@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
                if (cc->order > 0) {
                        int ok = zone_watermark_ok(zone, cc->order,
                                                low_wmark_pages(zone), 0, 0);
-                        if (ok && cc->order > zone->compact_order_failed)
+                        if (ok && cc->order >= zone->compact_order_failed)
                                zone->compact_order_failed = cc->order + 1;
                        /* Currently async compaction is never deferred. */
                        else if (!ok && cc->sync)
diff --git a/mm/internal.h b/mm/internal.h
index 3314f79d775a..b8c91b342e24 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -130,6 +130,7 @@ struct compact_control {
        int order;                      /* order a direct compactor needs */
        int migratetype;                /* MOVABLE, RECLAIMABLE etc */
        struct zone *zone;
+        bool *contended;                /* True if a lock was contended */
 };
 unsigned long
diff --git a/mm/mmap.c b/mm/mmap.c
index e3e86914f11a..9adee9fc0d8a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2309,7 +2309,7 @@ void exit_mmap(struct mm_struct *mm)
        }
        vm_unacct_memory(nr_accounted);
-        BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
+        WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
 }
 /* Insert vm structure into process list sorted by address
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 009ac285fea7..c66fb875104a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1928,6 +1928,17 @@ this_zone_full:
                zlc_active = 0;
                goto zonelist_scan;
        }
+        if (page)
+                /*
+                 * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
+                 * necessary to allocate the page. The expectation is
+                 * that the caller is taking steps that will free more
+                 * memory. The caller should avoid the page being used
+                 * for !PFMEMALLOC purposes.
+                 */
+                page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
        return page;
 }
@@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
        int migratetype, bool sync_migration,
-        bool *deferred_compaction,
+        bool *contended_compaction, bool *deferred_compaction,
        unsigned long *did_some_progress)
 {
        struct page *page;
@@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        current->flags |= PF_MEMALLOC;
        *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
-                                                nodemask, sync_migration);
+                                                nodemask, sync_migration,
+                                                contended_compaction);
        current->flags &= ~PF_MEMALLOC;
        if (*did_some_progress != COMPACT_SKIPPED) {
@@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
        int migratetype, bool sync_migration,
-        bool *deferred_compaction,
+        bool *contended_compaction, bool *deferred_compaction,
        unsigned long *did_some_progress)
 {
        return NULL;
@@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        unsigned long did_some_progress;
        bool sync_migration = false;
        bool deferred_compaction = false;
+        bool contended_compaction = false;
        /*
         * In the slowpath, we sanity check order to avoid ever trying to
@@ -2389,14 +2402,6 @@ rebalance:
                                zonelist, high_zoneidx, nodemask,
                                preferred_zone, migratetype);
                if (page) {
-                        /*
-                         * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
-                         * necessary to allocate the page. The expectation is
-                         * that the caller is taking steps that will free more
-                         * memory. The caller should avoid the page being used
-                         * for !PFMEMALLOC purposes.
-                         */
-                        page->pfmemalloc = true;
                        goto got_pg;
                }
        }
@@ -2422,6 +2427,7 @@ rebalance:
                                        nodemask,
                                        alloc_flags, preferred_zone,
                                        migratetype, sync_migration,
+                                        &contended_compaction,
                                        &deferred_compaction,
                                        &did_some_progress);
        if (page)
@@ -2431,10 +2437,11 @@ rebalance:
        /*
         * If compaction is deferred for high-order allocations, it is because
         * sync compaction recently failed. In this is the case and the caller
-         * has requested the system not be heavily disrupted, fail the
+         * requested a movable allocation that does not heavily disrupt the
-         * allocation now instead of entering direct reclaim
+         * system then fail the allocation instead of entering direct reclaim.
         */
-        if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+        if ((deferred_compaction || contended_compaction) &&
+                                                (gfp_mask & __GFP_NO_KSWAPD))
                goto nopage;
        /* Try direct reclaim and then allocating */
@@ -2505,6 +2512,7 @@ rebalance:
                                        nodemask,
                                        alloc_flags, preferred_zone,
                                        migratetype, sync_migration,
+                                        &contended_compaction,
                                        &deferred_compaction,
                                        &did_some_progress);
                if (page)
@@ -2569,8 +2577,6 @@ retry_cpuset:
                page = __alloc_pages_slowpath(gfp_mask, order,
                                zonelist, high_zoneidx, nodemask,
                                preferred_zone, migratetype);
-        else
-                page->pfmemalloc = false;
        trace_mm_page_alloc(page, order, gfp_mask, migratetype);

diff --git a/mm/compaction.c b/mm/compaction.c index e78cb9688421..7fcd3a52e68d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c
@@ -51,6 +51,47 @@ static inline bool migrate_async_suitable(int migratetype)
51	}	51	}
52		52
53	/*	53	/*
		54	* Compaction requires the taking of some coarse locks that are potentially
		55	* very heavily contended. Check if the process needs to be scheduled or
		56	* if the lock is contended. For async compaction, back out in the event
		57	* if contention is severe. For sync compaction, schedule.
		58	*
		59	* Returns true if the lock is held.
		60	* Returns false if the lock is released and compaction should abort
		61	*/
		62	static bool compact_checklock_irqsave(spinlock_t lock, unsigned long flags,
		63	bool locked, struct compact_control *cc)
		64	{
		65	if (need_resched() \|\| spin_is_contended(lock)) {
		66	if (locked) {
		67	spin_unlock_irqrestore(lock, *flags);
		68	locked = false;
		69	}
		70
		71	/* async aborts if taking too long or contended */
		72	if (!cc->sync) {
		73	if (cc->contended)
		74	*cc->contended = true;
		75	return false;
		76	}
		77
		78	cond_resched();
		79	if (fatal_signal_pending(current))
		80	return false;
		81	}
		82
		83	if (!locked)
		84	spin_lock_irqsave(lock, *flags);
		85	return true;
		86	}
		87
		88	static inline bool compact_trylock_irqsave(spinlock_t *lock,
		89	unsigned long flags, struct compact_control cc)
		90	{
		91	return compact_checklock_irqsave(lock, flags, false, cc);
		92	}
		93
		94	/*
54	* Isolate free pages onto a private freelist. Caller must hold zone->lock.	95	* Isolate free pages onto a private freelist. Caller must hold zone->lock.
55	* If @strict is true, will abort returning 0 on any invalid PFNs or non-free	96	* If @strict is true, will abort returning 0 on any invalid PFNs or non-free
56	* pages inside of the pageblock (even though it may still end up isolating	97	* pages inside of the pageblock (even though it may still end up isolating
@@ -173,7 +214,7 @@ isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn)
173	}	214	}
174		215
175	/* Update the number of anon and file isolated pages in the zone */	216	/* Update the number of anon and file isolated pages in the zone */
176	static void acct_isolated(struct zone zone, struct compact_control cc)	217	static void acct_isolated(struct zone zone, bool locked, struct compact_control cc)
177	{	218	{
178	struct page *page;	219	struct page *page;
179	unsigned int count[2] = { 0, };	220	unsigned int count[2] = { 0, };
@@ -181,8 +222,14 @@ static void acct_isolated(struct zone zone, struct compact_control cc)
181	list_for_each_entry(page, &cc->migratepages, lru)	222	list_for_each_entry(page, &cc->migratepages, lru)
182	count[!!page_is_file_cache(page)]++;	223	count[!!page_is_file_cache(page)]++;
183		224
184	__mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);	225	/* If locked we can use the interrupt unsafe versions */
185	__mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);	226	if (locked) {
		227	__mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
		228	__mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
		229	} else {
		230	mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
		231	mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
		232	}
186	}	233	}
187		234
188	/* Similar to reclaim, but different enough that they don't share logic */	235	/* Similar to reclaim, but different enough that they don't share logic */
@@ -228,6 +275,8 @@ isolate_migratepages_range(struct zone zone, struct compact_control cc,
228	struct list_head *migratelist = &cc->migratepages;	275	struct list_head *migratelist = &cc->migratepages;
229	isolate_mode_t mode = 0;	276	isolate_mode_t mode = 0;
230	struct lruvec *lruvec;	277	struct lruvec *lruvec;
		278	unsigned long flags;
		279	bool locked;
231		280
232	/*	281	/*
233	* Ensure that there are not too many pages isolated from the LRU	282	* Ensure that there are not too many pages isolated from the LRU
@@ -247,25 +296,22 @@ isolate_migratepages_range(struct zone zone, struct compact_control cc,
247		296
248	/* Time to isolate some pages for migration */	297	/* Time to isolate some pages for migration */
249	cond_resched();	298	cond_resched();
250	spin_lock_irq(&zone->lru_lock);	299	spin_lock_irqsave(&zone->lru_lock, flags);
		300	locked = true;
251	for (; low_pfn < end_pfn; low_pfn++) {	301	for (; low_pfn < end_pfn; low_pfn++) {
252	struct page *page;	302	struct page *page;
253	bool locked = true;
254		303
255	/* give a chance to irqs before checking need_resched() */	304	/* give a chance to irqs before checking need_resched() */
256	if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {	305	if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
257	spin_unlock_irq(&zone->lru_lock);	306	spin_unlock_irqrestore(&zone->lru_lock, flags);
258	locked = false;	307	locked = false;
259	}	308	}
260	if (need_resched() \|\| spin_is_contended(&zone->lru_lock)) {	309
261	if (locked)	310	/* Check if it is ok to still hold the lock */
262	spin_unlock_irq(&zone->lru_lock);	311	locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
263	cond_resched();	312	locked, cc);
264	spin_lock_irq(&zone->lru_lock);	313	if (!locked)
265	if (fatal_signal_pending(current))	314	break;
266	break;
267	} else if (!locked)
268	spin_lock_irq(&zone->lru_lock);
269		315
270	/*	316	/*
271	* migrate_pfn does not necessarily start aligned to a	317	* migrate_pfn does not necessarily start aligned to a
@@ -349,9 +395,10 @@ isolate_migratepages_range(struct zone zone, struct compact_control cc,
349	}	395	}
350	}	396	}
351		397
352	acct_isolated(zone, cc);	398	acct_isolated(zone, locked, cc);
353		399
354	spin_unlock_irq(&zone->lru_lock);	400	if (locked)
		401	spin_unlock_irqrestore(&zone->lru_lock, flags);
355		402
356	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);	403	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
357		404
@@ -384,6 +431,20 @@ static bool suitable_migration_target(struct page *page)
384	}	431	}
385		432
386	/*	433	/*
		434	* Returns the start pfn of the last page block in a zone. This is the starting
		435	* point for full compaction of a zone. Compaction searches for free pages from
		436	* the end of each zone, while isolate_freepages_block scans forward inside each
		437	* page block.
		438	*/
		439	static unsigned long start_free_pfn(struct zone *zone)
		440	{
		441	unsigned long free_pfn;
		442	free_pfn = zone->zone_start_pfn + zone->spanned_pages;
		443	free_pfn &= ~(pageblock_nr_pages-1);
		444	return free_pfn;
		445	}
		446
		447	/*
387	* Based on information in the current compact_control, find blocks	448	* Based on information in the current compact_control, find blocks
388	* suitable for isolating free pages from and then isolate them.	449	* suitable for isolating free pages from and then isolate them.
389	*/	450	*/
@@ -422,17 +483,6 @@ static void isolate_freepages(struct zone *zone,
422	pfn -= pageblock_nr_pages) {	483	pfn -= pageblock_nr_pages) {
423	unsigned long isolated;	484	unsigned long isolated;
424		485
425	/*
426	* Skip ahead if another thread is compacting in the area
427	* simultaneously. If we wrapped around, we can only skip
428	* ahead if zone->compact_cached_free_pfn also wrapped to
429	* above our starting point.
430	*/
431	if (cc->order > 0 && (!cc->wrapped \|\|
432	zone->compact_cached_free_pfn >
433	cc->start_free_pfn))
434	pfn = min(pfn, zone->compact_cached_free_pfn);
435
436	if (!pfn_valid(pfn))	486	if (!pfn_valid(pfn))
437	continue;	487	continue;
438		488
@@ -458,7 +508,16 @@ static void isolate_freepages(struct zone *zone,
458	* are disabled	508	* are disabled
459	*/	509	*/
460	isolated = 0;	510	isolated = 0;
461	spin_lock_irqsave(&zone->lock, flags);	511
		512	/*
		513	* The zone lock must be held to isolate freepages. This
		514	* unfortunately this is a very coarse lock and can be
		515	* heavily contended if there are parallel allocations
		516	* or parallel compactions. For async compaction do not
		517	* spin on the lock
		518	*/
		519	if (!compact_trylock_irqsave(&zone->lock, &flags, cc))
		520	break;
462	if (suitable_migration_target(page)) {	521	if (suitable_migration_target(page)) {
463	end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);	522	end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
464	isolated = isolate_freepages_block(pfn, end_pfn,	523	isolated = isolate_freepages_block(pfn, end_pfn,
@@ -474,7 +533,15 @@ static void isolate_freepages(struct zone *zone,
474	*/	533	*/
475	if (isolated) {	534	if (isolated) {
476	high_pfn = max(high_pfn, pfn);	535	high_pfn = max(high_pfn, pfn);
477	if (cc->order > 0)	536
		537	/*
		538	* If the free scanner has wrapped, update
		539	* compact_cached_free_pfn to point to the highest
		540	* pageblock with free pages. This reduces excessive
		541	* scanning of full pageblocks near the end of the
		542	* zone
		543	*/
		544	if (cc->order > 0 && cc->wrapped)
478	zone->compact_cached_free_pfn = high_pfn;	545	zone->compact_cached_free_pfn = high_pfn;
479	}	546	}
480	}	547	}
@@ -484,6 +551,11 @@ static void isolate_freepages(struct zone *zone,
484		551
485	cc->free_pfn = high_pfn;	552	cc->free_pfn = high_pfn;
486	cc->nr_freepages = nr_freepages;	553	cc->nr_freepages = nr_freepages;
		554
		555	/* If compact_cached_free_pfn is reset then set it now */
		556	if (cc->order > 0 && !cc->wrapped &&
		557	zone->compact_cached_free_pfn == start_free_pfn(zone))
		558	zone->compact_cached_free_pfn = high_pfn;
487	}	559	}
488		560
489	/*	561	/*
@@ -570,20 +642,6 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
570	return ISOLATE_SUCCESS;	642	return ISOLATE_SUCCESS;
571	}	643	}
572		644
573	/*
574	* Returns the start pfn of the last page block in a zone. This is the starting
575	* point for full compaction of a zone. Compaction searches for free pages from
576	* the end of each zone, while isolate_freepages_block scans forward inside each
577	* page block.
578	*/
579	static unsigned long start_free_pfn(struct zone *zone)
580	{
581	unsigned long free_pfn;
582	free_pfn = zone->zone_start_pfn + zone->spanned_pages;
583	free_pfn &= ~(pageblock_nr_pages-1);
584	return free_pfn;
585	}
586
587	static int compact_finished(struct zone *zone,	645	static int compact_finished(struct zone *zone,
588	struct compact_control *cc)	646	struct compact_control *cc)
589	{	647	{
@@ -771,7 +829,7 @@ out:
771		829
772	static unsigned long compact_zone_order(struct zone *zone,	830	static unsigned long compact_zone_order(struct zone *zone,
773	int order, gfp_t gfp_mask,	831	int order, gfp_t gfp_mask,
774	bool sync)	832	bool sync, bool *contended)
775	{	833	{
776	struct compact_control cc = {	834	struct compact_control cc = {
777	.nr_freepages = 0,	835	.nr_freepages = 0,
@@ -780,6 +838,7 @@ static unsigned long compact_zone_order(struct zone *zone,
780	.migratetype = allocflags_to_migratetype(gfp_mask),	838	.migratetype = allocflags_to_migratetype(gfp_mask),
781	.zone = zone,	839	.zone = zone,
782	.sync = sync,	840	.sync = sync,
		841	.contended = contended,
783	};	842	};
784	INIT_LIST_HEAD(&cc.freepages);	843	INIT_LIST_HEAD(&cc.freepages);
785	INIT_LIST_HEAD(&cc.migratepages);	844	INIT_LIST_HEAD(&cc.migratepages);
@@ -801,7 +860,7 @@ int sysctl_extfrag_threshold = 500;
801	*/	860	*/
802	unsigned long try_to_compact_pages(struct zonelist *zonelist,	861	unsigned long try_to_compact_pages(struct zonelist *zonelist,
803	int order, gfp_t gfp_mask, nodemask_t *nodemask,	862	int order, gfp_t gfp_mask, nodemask_t *nodemask,
804	bool sync)	863	bool sync, bool *contended)
805	{	864	{
806	enum zone_type high_zoneidx = gfp_zone(gfp_mask);	865	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
807	int may_enter_fs = gfp_mask & __GFP_FS;	866	int may_enter_fs = gfp_mask & __GFP_FS;
@@ -825,7 +884,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
825	nodemask) {	884	nodemask) {
826	int status;	885	int status;
827		886
828	status = compact_zone_order(zone, order, gfp_mask, sync);	887	status = compact_zone_order(zone, order, gfp_mask, sync,
		888	contended);
829	rc = max(status, rc);	889	rc = max(status, rc);
830		890
831	/* If a normal allocation would succeed, stop compacting */	891	/* If a normal allocation would succeed, stop compacting */
@@ -861,7 +921,7 @@ static int __compact_pgdat(pg_data_t pgdat, struct compact_control cc)
861	if (cc->order > 0) {	921	if (cc->order > 0) {
862	int ok = zone_watermark_ok(zone, cc->order,	922	int ok = zone_watermark_ok(zone, cc->order,
863	low_wmark_pages(zone), 0, 0);	923	low_wmark_pages(zone), 0, 0);
864	if (ok && cc->order > zone->compact_order_failed)	924	if (ok && cc->order >= zone->compact_order_failed)
865	zone->compact_order_failed = cc->order + 1;	925	zone->compact_order_failed = cc->order + 1;
866	/* Currently async compaction is never deferred. */	926	/* Currently async compaction is never deferred. */
867	else if (!ok && cc->sync)	927	else if (!ok && cc->sync)


diff --git a/mm/internal.h b/mm/internal.h index 3314f79d775a..b8c91b342e24 100644 --- a/mm/internal.h +++ b/mm/internal.h
@@ -130,6 +130,7 @@ struct compact_control {
130	int order; /* order a direct compactor needs */	130	int order; /* order a direct compactor needs */
131	int migratetype; /* MOVABLE, RECLAIMABLE etc */	131	int migratetype; /* MOVABLE, RECLAIMABLE etc */
132	struct zone *zone;	132	struct zone *zone;
		133	bool contended; / True if a lock was contended */
133	};	134	};
134		135
135	unsigned long	136	unsigned long


diff --git a/mm/mmap.c b/mm/mmap.c index e3e86914f11a..9adee9fc0d8a 100644 --- a/mm/mmap.c +++ b/mm/mmap.c
@@ -2309,7 +2309,7 @@ void exit_mmap(struct mm_struct *mm)
2309	}	2309	}
2310	vm_unacct_memory(nr_accounted);	2310	vm_unacct_memory(nr_accounted);
2311		2311
2312	BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);	2312	WARN_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
2313	}	2313	}
2314		2314
2315	/* Insert vm structure into process list sorted by address	2315	/* Insert vm structure into process list sorted by address


diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 009ac285fea7..c66fb875104a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c
@@ -1928,6 +1928,17 @@ this_zone_full:
1928	zlc_active = 0;	1928	zlc_active = 0;
1929	goto zonelist_scan;	1929	goto zonelist_scan;
1930	}	1930	}
		1931
		1932	if (page)
		1933	/*
		1934	* page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
		1935	* necessary to allocate the page. The expectation is
		1936	* that the caller is taking steps that will free more
		1937	* memory. The caller should avoid the page being used
		1938	* for !PFMEMALLOC purposes.
		1939	*/
		1940	page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS);
		1941
1931	return page;	1942	return page;
1932	}	1943	}
1933		1944
@@ -2091,7 +2102,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2091	struct zonelist *zonelist, enum zone_type high_zoneidx,	2102	struct zonelist *zonelist, enum zone_type high_zoneidx,
2092	nodemask_t nodemask, int alloc_flags, struct zone preferred_zone,	2103	nodemask_t nodemask, int alloc_flags, struct zone preferred_zone,
2093	int migratetype, bool sync_migration,	2104	int migratetype, bool sync_migration,
2094	bool *deferred_compaction,	2105	bool contended_compaction, bool deferred_compaction,
2095	unsigned long *did_some_progress)	2106	unsigned long *did_some_progress)
2096	{	2107	{
2097	struct page *page;	2108	struct page *page;
@@ -2106,7 +2117,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2106		2117
2107	current->flags \|= PF_MEMALLOC;	2118	current->flags \|= PF_MEMALLOC;
2108	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,	2119	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2109	nodemask, sync_migration);	2120	nodemask, sync_migration,
		2121	contended_compaction);
2110	current->flags &= ~PF_MEMALLOC;	2122	current->flags &= ~PF_MEMALLOC;
2111	if (*did_some_progress != COMPACT_SKIPPED) {	2123	if (*did_some_progress != COMPACT_SKIPPED) {
2112		2124
@@ -2152,7 +2164,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
2152	struct zonelist *zonelist, enum zone_type high_zoneidx,	2164	struct zonelist *zonelist, enum zone_type high_zoneidx,
2153	nodemask_t nodemask, int alloc_flags, struct zone preferred_zone,	2165	nodemask_t nodemask, int alloc_flags, struct zone preferred_zone,
2154	int migratetype, bool sync_migration,	2166	int migratetype, bool sync_migration,
2155	bool *deferred_compaction,	2167	bool contended_compaction, bool deferred_compaction,
2156	unsigned long *did_some_progress)	2168	unsigned long *did_some_progress)
2157	{	2169	{
2158	return NULL;	2170	return NULL;
@@ -2325,6 +2337,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
2325	unsigned long did_some_progress;	2337	unsigned long did_some_progress;
2326	bool sync_migration = false;	2338	bool sync_migration = false;
2327	bool deferred_compaction = false;	2339	bool deferred_compaction = false;
		2340	bool contended_compaction = false;
2328		2341
2329	/*	2342	/*
2330	* In the slowpath, we sanity check order to avoid ever trying to	2343	* In the slowpath, we sanity check order to avoid ever trying to
@@ -2389,14 +2402,6 @@ rebalance:
2389	zonelist, high_zoneidx, nodemask,	2402	zonelist, high_zoneidx, nodemask,
2390	preferred_zone, migratetype);	2403	preferred_zone, migratetype);
2391	if (page) {	2404	if (page) {
2392	/*
2393	* page->pfmemalloc is set when ALLOC_NO_WATERMARKS was
2394	* necessary to allocate the page. The expectation is
2395	* that the caller is taking steps that will free more
2396	* memory. The caller should avoid the page being used
2397	* for !PFMEMALLOC purposes.
2398	*/
2399	page->pfmemalloc = true;
2400	goto got_pg;	2405	goto got_pg;
2401	}	2406	}
2402	}	2407	}
@@ -2422,6 +2427,7 @@ rebalance:
2422	nodemask,	2427	nodemask,
2423	alloc_flags, preferred_zone,	2428	alloc_flags, preferred_zone,
2424	migratetype, sync_migration,	2429	migratetype, sync_migration,
		2430	&contended_compaction,
2425	&deferred_compaction,	2431	&deferred_compaction,
2426	&did_some_progress);	2432	&did_some_progress);
2427	if (page)	2433	if (page)
@@ -2431,10 +2437,11 @@ rebalance:
2431	/*	2437	/*
2432	* If compaction is deferred for high-order allocations, it is because	2438	* If compaction is deferred for high-order allocations, it is because
2433	* sync compaction recently failed. In this is the case and the caller	2439	* sync compaction recently failed. In this is the case and the caller
2434	* has requested the system not be heavily disrupted, fail the	2440	* requested a movable allocation that does not heavily disrupt the
2435	* allocation now instead of entering direct reclaim	2441	* system then fail the allocation instead of entering direct reclaim.
2436	*/	2442	*/
2437	if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))	2443	if ((deferred_compaction \|\| contended_compaction) &&
		2444	(gfp_mask & __GFP_NO_KSWAPD))
2438	goto nopage;	2445	goto nopage;
2439		2446
2440	/* Try direct reclaim and then allocating */	2447	/* Try direct reclaim and then allocating */
@@ -2505,6 +2512,7 @@ rebalance:
2505	nodemask,	2512	nodemask,
2506	alloc_flags, preferred_zone,	2513	alloc_flags, preferred_zone,
2507	migratetype, sync_migration,	2514	migratetype, sync_migration,
		2515	&contended_compaction,
2508	&deferred_compaction,	2516	&deferred_compaction,
2509	&did_some_progress);	2517	&did_some_progress);
2510	if (page)	2518	if (page)
@@ -2569,8 +2577,6 @@ retry_cpuset:
2569	page = __alloc_pages_slowpath(gfp_mask, order,	2577	page = __alloc_pages_slowpath(gfp_mask, order,
2570	zonelist, high_zoneidx, nodemask,	2578	zonelist, high_zoneidx, nodemask,
2571	preferred_zone, migratetype);	2579	preferred_zone, migratetype);
2572	else
2573	page->pfmemalloc = false;
2574		2580
2575	trace_mm_page_alloc(page, order, gfp_mask, migratetype);	2581	trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2576		2582