aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2017-01-24 18:18:38 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-02-01 02:33:04 -0500
commitb678e4ff7ce0d01bb14f0adb92c1786b0a341cca (patch)
tree717f56c7e8fa28f9947053852b8e2dd5765fa47d /mm
parentd1656c5aef4d72f03a7833d07a378c8f604b8307 (diff)
mm, page_alloc: move cpuset seqcount checking to slowpath
commit 5ce9bfef1d27944c119a397a9d827bef795487ce upstream. This is a preparation for the following patch to make review simpler. While the primary motivation is a bug fix, this also simplifies the fast path, although the moved code is only enabled when cpusets are in use. Link: http://lkml.kernel.org/r/20170120103843.24587-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Ganapatrao Kulkarni <gpkulkarni@gmail.com> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/page_alloc.c47
1 files changed, 26 insertions, 21 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dedadb4a779f..3e04bb398dc6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3502,12 +3502,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3502 struct page *page = NULL; 3502 struct page *page = NULL;
3503 unsigned int alloc_flags; 3503 unsigned int alloc_flags;
3504 unsigned long did_some_progress; 3504 unsigned long did_some_progress;
3505 enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; 3505 enum compact_priority compact_priority;
3506 enum compact_result compact_result; 3506 enum compact_result compact_result;
3507 int compaction_retries = 0; 3507 int compaction_retries;
3508 int no_progress_loops = 0; 3508 int no_progress_loops;
3509 unsigned long alloc_start = jiffies; 3509 unsigned long alloc_start = jiffies;
3510 unsigned int stall_timeout = 10 * HZ; 3510 unsigned int stall_timeout = 10 * HZ;
3511 unsigned int cpuset_mems_cookie;
3511 3512
3512 /* 3513 /*
3513 * In the slowpath, we sanity check order to avoid ever trying to 3514 * In the slowpath, we sanity check order to avoid ever trying to
@@ -3528,6 +3529,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3528 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) 3529 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
3529 gfp_mask &= ~__GFP_ATOMIC; 3530 gfp_mask &= ~__GFP_ATOMIC;
3530 3531
3532retry_cpuset:
3533 compaction_retries = 0;
3534 no_progress_loops = 0;
3535 compact_priority = DEF_COMPACT_PRIORITY;
3536 cpuset_mems_cookie = read_mems_allowed_begin();
3537
3531 /* 3538 /*
3532 * The fast path uses conservative alloc_flags to succeed only until 3539 * The fast path uses conservative alloc_flags to succeed only until
3533 * kswapd needs to be woken up, and to avoid the cost of setting up 3540 * kswapd needs to be woken up, and to avoid the cost of setting up
@@ -3699,6 +3706,15 @@ retry:
3699 } 3706 }
3700 3707
3701nopage: 3708nopage:
3709 /*
3710 * When updating a task's mems_allowed, it is possible to race with
3711 * parallel threads in such a way that an allocation can fail while
3712 * the mask is being updated. If a page allocation is about to fail,
3713 * check if the cpuset changed during allocation and if so, retry.
3714 */
3715 if (read_mems_allowed_retry(cpuset_mems_cookie))
3716 goto retry_cpuset;
3717
3702 warn_alloc(gfp_mask, 3718 warn_alloc(gfp_mask,
3703 "page allocation failure: order:%u", order); 3719 "page allocation failure: order:%u", order);
3704got_pg: 3720got_pg:
@@ -3713,7 +3729,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3713 struct zonelist *zonelist, nodemask_t *nodemask) 3729 struct zonelist *zonelist, nodemask_t *nodemask)
3714{ 3730{
3715 struct page *page; 3731 struct page *page;
3716 unsigned int cpuset_mems_cookie;
3717 unsigned int alloc_flags = ALLOC_WMARK_LOW; 3732 unsigned int alloc_flags = ALLOC_WMARK_LOW;
3718 gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ 3733 gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
3719 struct alloc_context ac = { 3734 struct alloc_context ac = {
@@ -3750,9 +3765,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3750 if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) 3765 if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
3751 alloc_flags |= ALLOC_CMA; 3766 alloc_flags |= ALLOC_CMA;
3752 3767
3753retry_cpuset:
3754 cpuset_mems_cookie = read_mems_allowed_begin();
3755
3756 /* Dirty zone balancing only done in the fast path */ 3768 /* Dirty zone balancing only done in the fast path */
3757 ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); 3769 ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
3758 3770
@@ -3765,6 +3777,11 @@ retry_cpuset:
3765 ac.high_zoneidx, ac.nodemask); 3777 ac.high_zoneidx, ac.nodemask);
3766 if (!ac.preferred_zoneref->zone) { 3778 if (!ac.preferred_zoneref->zone) {
3767 page = NULL; 3779 page = NULL;
3780 /*
3781 * This might be due to race with cpuset_current_mems_allowed
3782 * update, so make sure we retry with original nodemask in the
3783 * slow path.
3784 */
3768 goto no_zone; 3785 goto no_zone;
3769 } 3786 }
3770 3787
@@ -3773,6 +3790,7 @@ retry_cpuset:
3773 if (likely(page)) 3790 if (likely(page))
3774 goto out; 3791 goto out;
3775 3792
3793no_zone:
3776 /* 3794 /*
3777 * Runtime PM, block IO and its error handling path can deadlock 3795 * Runtime PM, block IO and its error handling path can deadlock
3778 * because I/O on the device might not complete. 3796 * because I/O on the device might not complete.
@@ -3790,24 +3808,11 @@ retry_cpuset:
3790 ac.nodemask = nodemask; 3808 ac.nodemask = nodemask;
3791 ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, 3809 ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
3792 ac.high_zoneidx, ac.nodemask); 3810 ac.high_zoneidx, ac.nodemask);
3793 if (!ac.preferred_zoneref->zone) 3811 /* If we have NULL preferred zone, slowpath wll handle that */
3794 goto no_zone;
3795 } 3812 }
3796 3813
3797 page = __alloc_pages_slowpath(alloc_mask, order, &ac); 3814 page = __alloc_pages_slowpath(alloc_mask, order, &ac);
3798 3815
3799no_zone:
3800 /*
3801 * When updating a task's mems_allowed, it is possible to race with
3802 * parallel threads in such a way that an allocation can fail while
3803 * the mask is being updated. If a page allocation is about to fail,
3804 * check if the cpuset changed during allocation and if so, retry.
3805 */
3806 if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
3807 alloc_mask = gfp_mask;
3808 goto retry_cpuset;
3809 }
3810
3811out: 3816out:
3812 if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && 3817 if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
3813 unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { 3818 unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {