diff options
author | Vlastimil Babka <vbabka@suse.cz> | 2017-01-24 18:18:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-24 19:26:14 -0500 |
commit | 5ce9bfef1d27944c119a397a9d827bef795487ce (patch) | |
tree | ac3dab0f4c7c9807fc01ca208fc5735011308621 /mm/page_alloc.c | |
parent | 16096c25bf0ca5d87e4fa6ec6108ba53feead212 (diff) |
mm, page_alloc: move cpuset seqcount checking to slowpath
This is a preparation for the following patch to make review simpler.
While the primary motivation is a bug fix, this also simplifies the fast
path, although the moved code is only enabled when cpusets are in use.
Link: http://lkml.kernel.org/r/20170120103843.24587-4-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Ganapatrao Kulkarni <gpkulkarni@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6f28b7e926d1..0df3c089d3af 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3523,12 +3523,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3523 | struct page *page = NULL; | 3523 | struct page *page = NULL; |
3524 | unsigned int alloc_flags; | 3524 | unsigned int alloc_flags; |
3525 | unsigned long did_some_progress; | 3525 | unsigned long did_some_progress; |
3526 | enum compact_priority compact_priority = DEF_COMPACT_PRIORITY; | 3526 | enum compact_priority compact_priority; |
3527 | enum compact_result compact_result; | 3527 | enum compact_result compact_result; |
3528 | int compaction_retries = 0; | 3528 | int compaction_retries; |
3529 | int no_progress_loops = 0; | 3529 | int no_progress_loops; |
3530 | unsigned long alloc_start = jiffies; | 3530 | unsigned long alloc_start = jiffies; |
3531 | unsigned int stall_timeout = 10 * HZ; | 3531 | unsigned int stall_timeout = 10 * HZ; |
3532 | unsigned int cpuset_mems_cookie; | ||
3532 | 3533 | ||
3533 | /* | 3534 | /* |
3534 | * In the slowpath, we sanity check order to avoid ever trying to | 3535 | * In the slowpath, we sanity check order to avoid ever trying to |
@@ -3549,6 +3550,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, | |||
3549 | (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) | 3550 | (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) |
3550 | gfp_mask &= ~__GFP_ATOMIC; | 3551 | gfp_mask &= ~__GFP_ATOMIC; |
3551 | 3552 | ||
3553 | retry_cpuset: | ||
3554 | compaction_retries = 0; | ||
3555 | no_progress_loops = 0; | ||
3556 | compact_priority = DEF_COMPACT_PRIORITY; | ||
3557 | cpuset_mems_cookie = read_mems_allowed_begin(); | ||
3558 | |||
3552 | /* | 3559 | /* |
3553 | * The fast path uses conservative alloc_flags to succeed only until | 3560 | * The fast path uses conservative alloc_flags to succeed only until |
3554 | * kswapd needs to be woken up, and to avoid the cost of setting up | 3561 | * kswapd needs to be woken up, and to avoid the cost of setting up |
@@ -3720,6 +3727,15 @@ retry: | |||
3720 | } | 3727 | } |
3721 | 3728 | ||
3722 | nopage: | 3729 | nopage: |
3730 | /* | ||
3731 | * When updating a task's mems_allowed, it is possible to race with | ||
3732 | * parallel threads in such a way that an allocation can fail while | ||
3733 | * the mask is being updated. If a page allocation is about to fail, | ||
3734 | * check if the cpuset changed during allocation and if so, retry. | ||
3735 | */ | ||
3736 | if (read_mems_allowed_retry(cpuset_mems_cookie)) | ||
3737 | goto retry_cpuset; | ||
3738 | |||
3723 | warn_alloc(gfp_mask, | 3739 | warn_alloc(gfp_mask, |
3724 | "page allocation failure: order:%u", order); | 3740 | "page allocation failure: order:%u", order); |
3725 | got_pg: | 3741 | got_pg: |
@@ -3734,7 +3750,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
3734 | struct zonelist *zonelist, nodemask_t *nodemask) | 3750 | struct zonelist *zonelist, nodemask_t *nodemask) |
3735 | { | 3751 | { |
3736 | struct page *page; | 3752 | struct page *page; |
3737 | unsigned int cpuset_mems_cookie; | ||
3738 | unsigned int alloc_flags = ALLOC_WMARK_LOW; | 3753 | unsigned int alloc_flags = ALLOC_WMARK_LOW; |
3739 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ | 3754 | gfp_t alloc_mask = gfp_mask; /* The gfp_t that was actually used for allocation */ |
3740 | struct alloc_context ac = { | 3755 | struct alloc_context ac = { |
@@ -3771,9 +3786,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
3771 | if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) | 3786 | if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) |
3772 | alloc_flags |= ALLOC_CMA; | 3787 | alloc_flags |= ALLOC_CMA; |
3773 | 3788 | ||
3774 | retry_cpuset: | ||
3775 | cpuset_mems_cookie = read_mems_allowed_begin(); | ||
3776 | |||
3777 | /* Dirty zone balancing only done in the fast path */ | 3789 | /* Dirty zone balancing only done in the fast path */ |
3778 | ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); | 3790 | ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE); |
3779 | 3791 | ||
@@ -3786,6 +3798,11 @@ retry_cpuset: | |||
3786 | ac.high_zoneidx, ac.nodemask); | 3798 | ac.high_zoneidx, ac.nodemask); |
3787 | if (!ac.preferred_zoneref->zone) { | 3799 | if (!ac.preferred_zoneref->zone) { |
3788 | page = NULL; | 3800 | page = NULL; |
3801 | /* | ||
3802 | * This might be due to race with cpuset_current_mems_allowed | ||
3803 | * update, so make sure we retry with original nodemask in the | ||
3804 | * slow path. | ||
3805 | */ | ||
3789 | goto no_zone; | 3806 | goto no_zone; |
3790 | } | 3807 | } |
3791 | 3808 | ||
@@ -3794,6 +3811,7 @@ retry_cpuset: | |||
3794 | if (likely(page)) | 3811 | if (likely(page)) |
3795 | goto out; | 3812 | goto out; |
3796 | 3813 | ||
3814 | no_zone: | ||
3797 | /* | 3815 | /* |
3798 | * Runtime PM, block IO and its error handling path can deadlock | 3816 | * Runtime PM, block IO and its error handling path can deadlock |
3799 | * because I/O on the device might not complete. | 3817 | * because I/O on the device might not complete. |
@@ -3811,24 +3829,11 @@ retry_cpuset: | |||
3811 | ac.nodemask = nodemask; | 3829 | ac.nodemask = nodemask; |
3812 | ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, | 3830 | ac.preferred_zoneref = first_zones_zonelist(ac.zonelist, |
3813 | ac.high_zoneidx, ac.nodemask); | 3831 | ac.high_zoneidx, ac.nodemask); |
3814 | if (!ac.preferred_zoneref->zone) | 3832 | /* If we have NULL preferred zone, slowpath wll handle that */ |
3815 | goto no_zone; | ||
3816 | } | 3833 | } |
3817 | 3834 | ||
3818 | page = __alloc_pages_slowpath(alloc_mask, order, &ac); | 3835 | page = __alloc_pages_slowpath(alloc_mask, order, &ac); |
3819 | 3836 | ||
3820 | no_zone: | ||
3821 | /* | ||
3822 | * When updating a task's mems_allowed, it is possible to race with | ||
3823 | * parallel threads in such a way that an allocation can fail while | ||
3824 | * the mask is being updated. If a page allocation is about to fail, | ||
3825 | * check if the cpuset changed during allocation and if so, retry. | ||
3826 | */ | ||
3827 | if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) { | ||
3828 | alloc_mask = gfp_mask; | ||
3829 | goto retry_cpuset; | ||
3830 | } | ||
3831 | |||
3832 | out: | 3837 | out: |
3833 | if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && | 3838 | if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page && |
3834 | unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { | 3839 | unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) { |