summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorVlastimil Babka <vbabka@suse.cz>2017-01-24 18:18:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-01-24 19:26:14 -0500
commit5ce9bfef1d27944c119a397a9d827bef795487ce (patch)
treeac3dab0f4c7c9807fc01ca208fc5735011308621 /mm/page_alloc.c
parent16096c25bf0ca5d87e4fa6ec6108ba53feead212 (diff)
mm, page_alloc: move cpuset seqcount checking to slowpath
This is a preparation for the following patch to make review simpler. While the primary motivation is a bug fix, this also simplifies the fast path, although the moved code is only enabled when cpusets are in use. Link: http://lkml.kernel.org/r/20170120103843.24587-4-vbabka@suse.cz Signed-off-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Ganapatrao Kulkarni <gpkulkarni@gmail.com> Cc: Michal Hocko <mhocko@suse.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c47
1 files changed, 26 insertions, 21 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6f28b7e926d1..0df3c089d3af 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3523 struct page *page = NULL; 3523 struct page *page = NULL;
3524 unsigned int alloc_flags; 3524 unsigned int alloc_flags;
3525 unsigned long did_some_progress; 3525 unsigned long did_some_progress;
3526 enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; 3526 enum compact_priority compact_priority;
3527 enum compact_result compact_result; 3527 enum compact_result compact_result;
3528 int compaction_retries = 0; 3528 int compaction_retries;
3529 int no_progress_loops = 0; 3529 int no_progress_loops;
3530 unsigned long alloc_start = jiffies; 3530 unsigned long alloc_start = jiffies;
3531 unsigned int stall_timeout = 10 * HZ; 3531 unsigned int stall_timeout = 10 * HZ;
3532 unsigned int cpuset_mems_cookie;
3532 3533
3533 /* 3534 /*
3534 * In the slowpath, we sanity check order to avoid ever trying to 3535 * In the slowpath, we sanity check order to avoid ever trying to
@@ -3549,6 +3550,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
3549 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) 3550 (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM)))
3550 gfp_mask &= ~__GFP_ATOMIC; 3551 gfp_mask &= ~__GFP_ATOMIC;
3551 3552
3553retry_cpuset:
3554 compaction_retries = 0;
3555 no_progress_loops = 0;
3556 compact_priority = DEF_COMPACT_PRIORITY;
3557 cpuset_mems_cookie = read_mems_allowed_begin();
3558
3552 /* 3559 /*
3553 * The fast path uses conservative alloc_flags to succeed only until 3560 * The fast path uses conservative alloc_flags to succeed only until
3554 * kswapd needs to be woken up, and to avoid the cost of setting up 3561 * kswapd needs to be woken up, and to avoid the cost of setting up
@@ -3720,6 +3727,15 @@ retry:
3720 } 3727 }
3721 3728
3722nopage: 3729nopage:
3730 /*
3731 * When updating a task's mems_allowed, it is possible to race with
3732 * parallel threads in such a way that an allocation can fail while
3733 * the mask is being updated. If a page allocation is about to fail,
3734 * check if the cpuset changed during allocation and if so, retry.
3735 */
3736 if (read_mems_allowed_retry(cpuset_mems_cookie))
3737 goto retry_cpuset;
3738
3723 warn_alloc(gfp_mask, 3739 warn_alloc(gfp_mask,
3724 "page allocation failure: order:%u", order); 3740 "page allocation failure: order:%u", order);
3725got_pg: 3741got_pg:
@@ -3734,7 +3750,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3734 struct zonelist *zonelist, nodemask_t *nodemask) 3750 struct zonelist *zonelist, nodemask_t *nodemask)
3735{ 3751{
3736 struct page *page; 3752 struct page *page;
3737 unsigned int cpuset_mems_cookie;
3738 unsigned int alloc_flags = ALLOC_WMARK_LOW; 3753 unsigned int alloc_flags = ALLOC_WMARK_LOW;
3739 gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ 3754 gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */
3740 struct alloc_context ac = { 3755 struct alloc_context ac = {
@@ -3771,9 +3786,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
3771 if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) 3786 if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE)
3772 alloc_flags |= ALLOC_CMA; 3787 alloc_flags |= ALLOC_CMA;
3773 3788
3774retry_cpuset:
3775 cpuset_mems_cookie = read_mems_allowed_begin();
3776
3777 /* Dirty zone balancing only done in the fast path */ 3789 /* Dirty zone balancing only done in the fast path */
3778 ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); 3790 ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
3779 3791
@@ -3786,6 +3798,11 @@ retry_cpuset:
3786 ac.high_zoneidx, ac.nodemask); 3798 ac.high_zoneidx, ac.nodemask);
3787 if (!ac.preferred_zoneref->zone) { 3799 if (!ac.preferred_zoneref->zone) {
3788 page = NULL; 3800 page = NULL;
3801 /*
3802 * This might be due to race with cpuset_current_mems_allowed
3803 * update, so make sure we retry with original nodemask in the
3804 * slow path.
3805 */
3789 goto no_zone; 3806 goto no_zone;
3790 } 3807 }
3791 3808
@@ -3794,6 +3811,7 @@ retry_cpuset:
3794 if (likely(page)) 3811 if (likely(page))
3795 goto out; 3812 goto out;
3796 3813
3814no_zone:
3797 /* 3815 /*
3798 * Runtime PM, block IO and its error handling path can deadlock 3816 * Runtime PM, block IO and its error handling path can deadlock
3799 * because I/O on the device might not complete. 3817 * because I/O on the device might not complete.
@@ -3811,24 +3829,11 @@ retry_cpuset:
3811 ac.nodemask = nodemask; 3829 ac.nodemask = nodemask;
3812 ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, 3830 ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
3813 ac.high_zoneidx, ac.nodemask); 3831 ac.high_zoneidx, ac.nodemask);
3814 if (!ac.preferred_zoneref->zone) 3832 /* If we have NULL preferred zone, slowpath wll handle that */
3815 goto no_zone;
3816 } 3833 }
3817 3834
3818 page = __alloc_pages_slowpath(alloc_mask, order, &ac); 3835 page = __alloc_pages_slowpath(alloc_mask, order, &ac);
3819 3836
3820no_zone:
3821 /*
3822 * When updating a task's mems_allowed, it is possible to race with
3823 * parallel threads in such a way that an allocation can fail while
3824 * the mask is being updated. If a page allocation is about to fail,
3825 * check if the cpuset changed during allocation and if so, retry.
3826 */
3827 if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) {
3828 alloc_mask = gfp_mask;
3829 goto retry_cpuset;
3830 }
3831
3832out: 3837out:
3833 if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && 3838 if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
3834 unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { 3839 unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {