aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2014-07-30 19:08:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-07-30 20:16:13 -0400
commitb104a35d32025ca740539db2808aa3385d0f30eb (patch)
tree60e0ba241083b6fea6d85a7181225e918c9f7ff5
parentf6789593d5cea42a4ecb1cbeab6a23ade5ebbba7 (diff)
mm, thp: do not allow thp faults to avoid cpuset restrictions
The page allocator relies on __GFP_WAIT to determine if ALLOC_CPUSET should be set in allocflags. ALLOC_CPUSET controls if a page allocation should be restricted only to the set of allowed cpuset mems. Transparent hugepages clears __GFP_WAIT when defrag is disabled to prevent the fault path from using memory compaction or direct reclaim. Thus, it is unfairly able to allocate outside of its cpuset mems restriction as a side-effect. This patch ensures that ALLOC_CPUSET is only cleared when the gfp mask is truly GFP_ATOMIC by verifying it is also not a thp allocation. Signed-off-by: David Rientjes <rientjes@google.com> Reported-by: Alex Thorlton <athorlton@sgi.com> Tested-by: Alex Thorlton <athorlton@sgi.com> Cc: Bob Liu <lliubbo@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Hedi Berriche <hedi@sgi.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/page_alloc.c16
1 files changed, 8 insertions, 8 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8bcfe3ae20cb..ef44ad736ca1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2447,7 +2447,7 @@ static inline int
2447gfp_to_alloc_flags(gfp_t gfp_mask) 2447gfp_to_alloc_flags(gfp_t gfp_mask)
2448{ 2448{
2449 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET; 2449 int alloc_flags = ALLOC_WMARK_MIN | ALLOC_CPUSET;
2450 const gfp_t wait = gfp_mask & __GFP_WAIT; 2450 const bool atomic = !(gfp_mask & (__GFP_WAIT | __GFP_NO_KSWAPD));
2451 2451
2452 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */ 2452 /* __GFP_HIGH is assumed to be the same as ALLOC_HIGH to save a branch. */
2453 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH); 2453 BUILD_BUG_ON(__GFP_HIGH != (__force gfp_t) ALLOC_HIGH);
@@ -2456,20 +2456,20 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
2456 * The caller may dip into page reserves a bit more if the caller 2456 * The caller may dip into page reserves a bit more if the caller
2457 * cannot run direct reclaim, or if the caller has realtime scheduling 2457 * cannot run direct reclaim, or if the caller has realtime scheduling
2458 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will 2458 * policy or is asking for __GFP_HIGH memory. GFP_ATOMIC requests will
2459 * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH). 2459 * set both ALLOC_HARDER (atomic == true) and ALLOC_HIGH (__GFP_HIGH).
2460 */ 2460 */
2461 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH); 2461 alloc_flags |= (__force int) (gfp_mask & __GFP_HIGH);
2462 2462
2463 if (!wait) { 2463 if (atomic) {
2464 /* 2464 /*
2465 * Not worth trying to allocate harder for 2465 * Not worth trying to allocate harder for __GFP_NOMEMALLOC even
2466 * __GFP_NOMEMALLOC even if it can't schedule. 2466 * if it can't schedule.
2467 */ 2467 */
2468 if (!(gfp_mask & __GFP_NOMEMALLOC)) 2468 if (!(gfp_mask & __GFP_NOMEMALLOC))
2469 alloc_flags |= ALLOC_HARDER; 2469 alloc_flags |= ALLOC_HARDER;
2470 /* 2470 /*
2471 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. 2471 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
2472 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 2472 * comment for __cpuset_node_allowed_softwall().
2473 */ 2473 */
2474 alloc_flags &= ~ALLOC_CPUSET; 2474 alloc_flags &= ~ALLOC_CPUSET;
2475 } else if (unlikely(rt_task(current)) && !in_interrupt()) 2475 } else if (unlikely(rt_task(current)) && !in_interrupt())