aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2019-03-05 18:45:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 00:07:17 -0500
commit5e1f0f098b4649fad53011246bcaeff011ffdf5d (patch)
tree88e1399008fd89a44a52b3dabb6da42eef15da05 /mm/page_alloc.c
parente332f741a8dd1ec9a6dc8aa997296ecbfe64323e (diff)
mm, compaction: capture a page under direct compaction
Compaction is inherently race-prone as a suitable page freed during compaction can be allocated by any parallel task. This patch uses a capture_control structure to isolate a page immediately when it is freed by a direct compactor in the slow path of the page allocator. The intent is to avoid redundant scanning. 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Amean fault-both-1 0.00 ( 0.00%) 0.00 * 0.00%* Amean fault-both-3 2582.11 ( 0.00%) 2563.68 ( 0.71%) Amean fault-both-5 4500.26 ( 0.00%) 4233.52 ( 5.93%) Amean fault-both-7 5819.53 ( 0.00%) 6333.65 ( -8.83%) Amean fault-both-12 9321.18 ( 0.00%) 9759.38 ( -4.70%) Amean fault-both-18 9782.76 ( 0.00%) 10338.76 ( -5.68%) Amean fault-both-24 15272.81 ( 0.00%) 13379.55 * 12.40%* Amean fault-both-30 15121.34 ( 0.00%) 16158.25 ( -6.86%) Amean fault-both-32 18466.67 ( 0.00%) 18971.21 ( -2.73%) Latency is only moderately affected but the devil is in the details. A closer examination indicates that base page fault latency is reduced but latency of huge pages is increased as it takes creater care to succeed. Part of the "problem" is that allocation success rates are close to 100% even when under pressure and compaction gets harder 5.0.0-rc1 5.0.0-rc1 selective-v3r17 capture-v3r19 Percentage huge-3 96.70 ( 0.00%) 98.23 ( 1.58%) Percentage huge-5 96.99 ( 0.00%) 95.30 ( -1.75%) Percentage huge-7 94.19 ( 0.00%) 97.24 ( 3.24%) Percentage huge-12 94.95 ( 0.00%) 97.35 ( 2.53%) Percentage huge-18 96.74 ( 0.00%) 97.30 ( 0.58%) Percentage huge-24 97.07 ( 0.00%) 97.55 ( 0.50%) Percentage huge-30 95.69 ( 0.00%) 98.50 ( 2.95%) Percentage huge-32 96.70 ( 0.00%) 99.27 ( 2.65%) And scan rates are reduced as expected by 6% for the migration scanner and 29% for the free scanner indicating that there is less redundant work. Compaction migrate scanned 20815362 19573286 Compaction free scanned 16352612 11510663 [mgorman@techsingularity.net: remove redundant check] Link: http://lkml.kernel.org/r/20190201143853.GH9565@techsingularity.net Link: http://lkml.kernel.org/r/20190118175136.31341-23-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Dan Carpenter <dan.carpenter@oracle.com> Cc: David Rientjes <rientjes@google.com> Cc: YueHaibing <yuehaibing@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c73
1 files changed, 69 insertions, 4 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2e132b9e7a93..09bf2c5f8b4b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -789,6 +789,57 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
789 return 0; 789 return 0;
790} 790}
791 791
792#ifdef CONFIG_COMPACTION
793static inline struct capture_control *task_capc(struct zone *zone)
794{
795 struct capture_control *capc = current->capture_control;
796
797 return capc &&
798 !(current->flags & PF_KTHREAD) &&
799 !capc->page &&
800 capc->cc->zone == zone &&
801 capc->cc->direct_compaction ? capc : NULL;
802}
803
804static inline bool
805compaction_capture(struct capture_control *capc, struct page *page,
806 int order, int migratetype)
807{
808 if (!capc || order != capc->cc->order)
809 return false;
810
811 /* Do not accidentally pollute CMA or isolated regions*/
812 if (is_migrate_cma(migratetype) ||
813 is_migrate_isolate(migratetype))
814 return false;
815
816 /*
817 * Do not let lower order allocations polluate a movable pageblock.
818 * This might let an unmovable request use a reclaimable pageblock
819 * and vice-versa but no more than normal fallback logic which can
820 * have trouble finding a high-order free page.
821 */
822 if (order < pageblock_order && migratetype == MIGRATE_MOVABLE)
823 return false;
824
825 capc->page = page;
826 return true;
827}
828
829#else
830static inline struct capture_control *task_capc(struct zone *zone)
831{
832 return NULL;
833}
834
835static inline bool
836compaction_capture(struct capture_control *capc, struct page *page,
837 int order, int migratetype)
838{
839 return false;
840}
841#endif /* CONFIG_COMPACTION */
842
792/* 843/*
793 * Freeing function for a buddy system allocator. 844 * Freeing function for a buddy system allocator.
794 * 845 *
@@ -822,6 +873,7 @@ static inline void __free_one_page(struct page *page,
822 unsigned long uninitialized_var(buddy_pfn); 873 unsigned long uninitialized_var(buddy_pfn);
823 struct page *buddy; 874 struct page *buddy;
824 unsigned int max_order; 875 unsigned int max_order;
876 struct capture_control *capc = task_capc(zone);
825 877
826 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); 878 max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
827 879
@@ -837,6 +889,11 @@ static inline void __free_one_page(struct page *page,
837 889
838continue_merging: 890continue_merging:
839 while (order < max_order - 1) { 891 while (order < max_order - 1) {
892 if (compaction_capture(capc, page, order, migratetype)) {
893 __mod_zone_freepage_state(zone, -(1 << order),
894 migratetype);
895 return;
896 }
840 buddy_pfn = __find_buddy_pfn(pfn, order); 897 buddy_pfn = __find_buddy_pfn(pfn, order);
841 buddy = page + (buddy_pfn - pfn); 898 buddy = page + (buddy_pfn - pfn);
842 899
@@ -3710,7 +3767,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3710 unsigned int alloc_flags, const struct alloc_context *ac, 3767 unsigned int alloc_flags, const struct alloc_context *ac,
3711 enum compact_priority prio, enum compact_result *compact_result) 3768 enum compact_priority prio, enum compact_result *compact_result)
3712{ 3769{
3713 struct page *page; 3770 struct page *page = NULL;
3714 unsigned long pflags; 3771 unsigned long pflags;
3715 unsigned int noreclaim_flag; 3772 unsigned int noreclaim_flag;
3716 3773
@@ -3721,13 +3778,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3721 noreclaim_flag = memalloc_noreclaim_save(); 3778 noreclaim_flag = memalloc_noreclaim_save();
3722 3779
3723 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, 3780 *compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
3724 prio); 3781 prio, &page);
3725 3782
3726 memalloc_noreclaim_restore(noreclaim_flag); 3783 memalloc_noreclaim_restore(noreclaim_flag);
3727 psi_memstall_leave(&pflags); 3784 psi_memstall_leave(&pflags);
3728 3785
3729 if (*compact_result <= COMPACT_INACTIVE) 3786 if (*compact_result <= COMPACT_INACTIVE) {
3787 WARN_ON_ONCE(page);
3730 return NULL; 3788 return NULL;
3789 }
3731 3790
3732 /* 3791 /*
3733 * At least in one zone compaction wasn't deferred or skipped, so let's 3792 * At least in one zone compaction wasn't deferred or skipped, so let's
@@ -3735,7 +3794,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
3735 */ 3794 */
3736 count_vm_event(COMPACTSTALL); 3795 count_vm_event(COMPACTSTALL);
3737 3796
3738 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); 3797 /* Prep a captured page if available */
3798 if (page)
3799 prep_new_page(page, order, gfp_mask, alloc_flags);
3800
3801 /* Try get a page from the freelist if available */
3802 if (!page)
3803 page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
3739 3804
3740 if (page) { 3805 if (page) {
3741 struct zone *zone = page_zone(page); 3806 struct zone *zone = page_zone(page);