aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2015-06-24 19:57:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 20:49:43 -0400
commiteb3c24f305e56caaf5c4bd34d2923839688d470e (patch)
tree37766a627064a5b6cce7c0160204b71a5a2e635b
parent4165b9b46181290d7e6ac276080c89b65623c633 (diff)
mm, memcg: Try charging a page before setting page up to date
Historically memcg overhead was high even if memcg was unused. This has improved a lot but it still showed up in a profile summary as being a problem. /usr/src/linux-4.0-vanilla/mm/memcontrol.c 6.6441 395842 mem_cgroup_try_charge 2.950% 175781 __mem_cgroup_count_vm_event 1.431% 85239 mem_cgroup_page_lruvec 0.456% 27156 mem_cgroup_commit_charge 0.392% 23342 uncharge_list 0.323% 19256 mem_cgroup_update_lru_size 0.278% 16538 memcg_check_events 0.216% 12858 mem_cgroup_charge_statistics.isra.22 0.188% 11172 try_charge 0.150% 8928 commit_charge 0.141% 8388 get_mem_cgroup_from_mm 0.121% 7184 That is showing that 6.64% of system CPU cycles were in memcontrol.c and dominated by mem_cgroup_try_charge. The annotation shows that the bulk of the cost was checking PageSwapCache which is expected to be cache hot but is very expensive. The problem appears to be that __SetPageUptodate is called just before the check which is a write barrier. It is required to make sure struct page and page data is written before the PTE is updated and the data visible to userspace. memcg charging does not require or need the barrier but gets unfairly hit with the cost so this patch attempts the charging before the barrier. Aside from the accidental cost to memcg there is the added benefit that the barrier is avoided if the page cannot be charged. When applied the relevant profile summary is as follows. /usr/src/linux-4.0-chargefirst-v2r1/mm/memcontrol.c 3.7907 223277 __mem_cgroup_count_vm_event 1.143% 67312 mem_cgroup_page_lruvec 0.465% 27403 mem_cgroup_commit_charge 0.381% 22452 uncharge_list 0.332% 19543 mem_cgroup_update_lru_size 0.284% 16704 get_mem_cgroup_from_mm 0.271% 15952 mem_cgroup_try_charge 0.237% 13982 memcg_check_events 0.222% 13058 mem_cgroup_charge_statistics.isra.22 0.185% 10920 commit_charge 0.140% 8235 try_charge 0.131% 7716 That brings the overhead down to 3.79% and leaves the memcg fault accounting to the root cgroup but it's an improvement. The difference in headline performance of the page fault microbench is marginal as memcg is such a small component of it. pft faults 4.0.0 4.0.0 vanilla chargefirst Hmean faults/cpu-1 1443258.1051 ( 0.00%) 1509075.7561 ( 4.56%) Hmean faults/cpu-3 1340385.9270 ( 0.00%) 1339160.7113 ( -0.09%) Hmean faults/cpu-5 875599.0222 ( 0.00%) 874174.1255 ( -0.16%) Hmean faults/cpu-7 601146.6726 ( 0.00%) 601370.9977 ( 0.04%) Hmean faults/cpu-8 510728.2754 ( 0.00%) 510598.8214 ( -0.03%) Hmean faults/sec-1 1432084.7845 ( 0.00%) 1497935.5274 ( 4.60%) Hmean faults/sec-3 3943818.1437 ( 0.00%) 3941920.1520 ( -0.05%) Hmean faults/sec-5 3877573.5867 ( 0.00%) 3869385.7553 ( -0.21%) Hmean faults/sec-7 3991832.0418 ( 0.00%) 3992181.4189 ( 0.01%) Hmean faults/sec-8 3987189.8167 ( 0.00%) 3986452.2204 ( -0.02%) It's only visible at single threaded. The overhead is there for higher threads but other factors dominate. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memory.c10
1 files changed, 6 insertions, 4 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 17734c3c1183..11b9ca176740 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2081,11 +2081,12 @@ static int wp_page_copy(struct mm_struct *mm, struct vm_area_struct *vma,
2081 goto oom; 2081 goto oom;
2082 cow_user_page(new_page, old_page, address, vma); 2082 cow_user_page(new_page, old_page, address, vma);
2083 } 2083 }
2084 __SetPageUptodate(new_page);
2085 2084
2086 if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) 2085 if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
2087 goto oom_free_new; 2086 goto oom_free_new;
2088 2087
2088 __SetPageUptodate(new_page);
2089
2089 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 2090 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2090 2091
2091 /* 2092 /*
@@ -2689,6 +2690,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2689 page = alloc_zeroed_user_highpage_movable(vma, address); 2690 page = alloc_zeroed_user_highpage_movable(vma, address);
2690 if (!page) 2691 if (!page)
2691 goto oom; 2692 goto oom;
2693
2694 if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
2695 goto oom_free_page;
2696
2692 /* 2697 /*
2693 * The memory barrier inside __SetPageUptodate makes sure that 2698 * The memory barrier inside __SetPageUptodate makes sure that
2694 * preceeding stores to the page contents become visible before 2699 * preceeding stores to the page contents become visible before
@@ -2696,9 +2701,6 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2696 */ 2701 */
2697 __SetPageUptodate(page); 2702 __SetPageUptodate(page);
2698 2703
2699 if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
2700 goto oom_free_page;
2701
2702 entry = mk_pte(page, vma->vm_page_prot); 2704 entry = mk_pte(page, vma->vm_page_prot);
2703 if (vma->vm_flags & VM_WRITE) 2705 if (vma->vm_flags & VM_WRITE)
2704 entry = pte_mkwrite(pte_mkdirty(entry)); 2706 entry = pte_mkwrite(pte_mkdirty(entry));