diff options
author | Vlastimil Babka <vbabka@suse.cz> | 2017-01-24 18:18:38 -0500 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-02-01 02:33:04 -0500 |
commit | b678e4ff7ce0d01bb14f0adb92c1786b0a341cca (patch) | |
tree | 717f56c7e8fa28f9947053852b8e2dd5765fa47d /mm | |
parent | d1656c5aef4d72f03a7833d07a378c8f604b8307 (diff) |
mm, page_alloc: move cpuset seqcount checking to slowpath
commit 5ce9bfef1d27944c119a397a9d827bef795487ce upstream.
This is a preparation for the following patch to make review simpler.
While the primary motivation is a bug fix, this also simplifies the fast
path, although the moved code is only enabled when cpusets are in use.
Link: http://lkml.kernel.org/r/20170120103843.24587-4-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Ganapatrao Kulkarni <gpkulkarni@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/page_alloc.c | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dedadb4a779f..3e04bb398dc6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3502,12 +3502,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3502 | struct page *page = NULL; | 3502 | struct page *page = NULL; |
3503 | unsigned int alloc_flags; | 3503 | unsigned int alloc_flags; |
3504 | unsigned long did_some_progress; | 3504 | unsigned long did_some_progress; |
3505 | enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; | 3505 | enum compact_priority compact_priority; |
3506 | enum compact_result compact_result; | 3506 | enum compact_result compact_result; |
3507 | int compaction_retries = 0; | 3507 | int compaction_retries; |
3508 | int no_progress_loops = 0; | 3508 | int no_progress_loops; |
3509 | unsigned long alloc_start = jiffies; | 3509 | unsigned long alloc_start = jiffies; |
3510 | unsigned int stall_timeout = 10 * HZ; | 3510 | unsigned int stall_timeout = 10 * HZ; |
3511 | unsigned int cpuset_mems_cookie; | ||
3511 | 3512 | ||
3512 | /* | 3513 | /* |
3513 | * In the slowpath, we sanity check order to avoid ever trying to | 3514 | * In the slowpath, we sanity check order to avoid ever trying to |
@@ -3528,6 +3529,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3528 | (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) | 3529 | (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) |
3529 | gfp_mask &= ~__GFP_ATOMIC; | 3530 | gfp_mask &= ~__GFP_ATOMIC; |
3530 | 3531 | ||
3532 | retry_cpuset: | ||
3533 | compaction_retries = 0; | ||
3534 | no_progress_loops = 0; | ||
3535 | compact_priority = DEF_COMPACT_PRIORITY; | ||
3536 | cpuset_mems_cookie = read_mems_allowed_begin(); | ||
3537 | |||
3531 | /* | 3538 | /* |
3532 | * The fast path uses conservative alloc_flags to succeed only until | 3539 | * The fast path uses conservative alloc_flags to succeed only until |
3533 | * kswapd needs to be woken up, and to avoid the cost of setting up | 3540 | * kswapd needs to be woken up, and to avoid the cost of setting up |
@@ -3699,6 +3706,15 @@ retry: | |||
3699 | } | 3706 | } |
3700 | 3707 | ||
3701 | nopage: | 3708 | nopage: |
3709 | /* | ||
3710 | * When updating a task's mems_allowed, it is possible to race with | ||
3711 | * parallel threads in such a way that an allocation can fail while | ||
3712 | * the mask is being updated. If a page allocation is about to fail, | ||
3713 | * check if the cpuset changed during allocation and if so, retry. | ||
3714 | */ | ||
3715 | if (read_mems_allowed_retry(cpuset_mems_cookie)) | ||
3716 | goto retry_cpuset; | ||
3717 | |||
3702 | warn_alloc(gfp_mask, | 3718 | warn_alloc(gfp_mask, |
3703 | "page allocation failure: order:%u", order); | 3719 | "page allocation failure: order:%u", order); |
3704 | got_pg: | 3720 | got_pg: |
@@ -3713,7 +3729,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
3713 | struct zonelist *zonelist, nodemask_t *nodemask) | 3729 | struct zonelist *zonelist, nodemask_t *nodemask) |
3714 | { | 3730 | { |
3715 | struct page *page; | 3731 | struct page *page; |
3716 | unsigned int cpuset_mems_cookie; | ||
3717 | unsigned int alloc_flags = ALLOC_WMARK_LOW; | 3732 | unsigned int alloc_flags = ALLOC_WMARK_LOW; |
3718 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ | 3733 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ |
3719 | struct alloc_context ac = { | 3734 | struct alloc_context ac = { |
@@ -3750,9 +3765,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
3750 | if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) | 3765 | if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) |
3751 | alloc_flags |= ALLOC_CMA; | 3766 | alloc_flags |= ALLOC_CMA; |
3752 | 3767 | ||
3753 | retry_cpuset: | ||
3754 | cpuset_mems_cookie = read_mems_allowed_begin(); | ||
3755 | |||
3756 | /* Dirty zone balancing only done in the fast path */ | 3768 | /* Dirty zone balancing only done in the fast path */ |
3757 | ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); | 3769 | ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); |
3758 | 3770 | ||
@@ -3765,6 +3777,11 @@ retry_cpuset: | |||
3765 | ac.high_zoneidx, ac.nodemask); | 3777 | ac.high_zoneidx, ac.nodemask); |
3766 | if (!ac.preferred_zoneref->zone) { | 3778 | if (!ac.preferred_zoneref->zone) { |
3767 | page = NULL; | 3779 | page = NULL; |
3780 | /* | ||
3781 | * This might be due to race with cpuset_current_mems_allowed | ||
3782 | * update, so make sure we retry with original nodemask in the | ||
3783 | * slow path. | ||
3784 | */ | ||
3768 | goto no_zone; | 3785 | goto no_zone; |
3769 | } | 3786 | } |
3770 | 3787 | ||
@@ -3773,6 +3790,7 @@ retry_cpuset: | |||
3773 | if (likely(page)) | 3790 | if (likely(page)) |
3774 | goto out; | 3791 | goto out; |
3775 | 3792 | ||
3793 | no_zone: | ||
3776 | /* | 3794 | /* |
3777 | * Runtime PM, block IO and its error handling path can deadlock | 3795 | * Runtime PM, block IO and its error handling path can deadlock |
3778 | * because I/O on the device might not complete. | 3796 | * because I/O on the device might not complete. |
@@ -3790,24 +3808,11 @@ retry_cpuset: | |||
3790 | ac.nodemask = nodemask; | 3808 | ac.nodemask = nodemask; |
3791 | ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, | 3809 | ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, |
3792 | ac.high_zoneidx, ac.nodemask); | 3810 | ac.high_zoneidx, ac.nodemask); |
3793 | if (!ac.preferred_zoneref->zone) | 3811 | /* If we have NULL preferred zone, slowpath wll handle that */ |
3794 | goto no_zone; | ||
3795 | } | 3812 | } |
3796 | 3813 | ||
3797 | page = __alloc_pages_slowpath(alloc_mask, order, &ac); | 3814 | page = __alloc_pages_slowpath(alloc_mask, order, &ac); |
3798 | 3815 | ||
3799 | no_zone: | ||
3800 | /* | ||
3801 | * When updating a task's mems_allowed, it is possible to race with | ||
3802 | * parallel threads in such a way that an allocation can fail while | ||
3803 | * the mask is being updated. If a page allocation is about to fail, | ||
3804 | * check if the cpuset changed during allocation and if so, retry. | ||
3805 | */ | ||
3806 | if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) { | ||
3807 | alloc_mask = gfp_mask; | ||
3808 | goto retry_cpuset; | ||
3809 | } | ||
3810 | |||
3811 | out: | 3816 | out: |
3812 | if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && | 3817 | if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && |
3813 | unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { | 3818 | unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { |