aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/uprobes.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:17 -0400
commit00501b531c4723972aa11d6d4ebcf8d6552007c8 (patch)
treeb3ad4850d58f137cf87b8424412d962fb251839f /kernel/events/uprobes.c
parent4449a51a7c281602d3a385044ab928322a122a02 (diff)
mm: memcontrol: rewrite charge API
These patches rework memcg charge lifetime to integrate more naturally with the lifetime of user pages. This drastically simplifies the code and reduces charging and uncharging overhead. The most expensive part of charging and uncharging is the page_cgroup bit spinlock, which is removed entirely after this series. Here are the top-10 profile entries of a stress test that reads a 128G sparse file on a freshly booted box, without even a dedicated cgroup (i.e. executing in the root memcg). Before: 15.36% cat [kernel.kallsyms] [k] copy_user_generic_string 13.31% cat [kernel.kallsyms] [k] memset 11.48% cat [kernel.kallsyms] [k] do_mpage_readpage 4.23% cat [kernel.kallsyms] [k] get_page_from_freelist 2.38% cat [kernel.kallsyms] [k] put_page 2.32% cat [kernel.kallsyms] [k] __mem_cgroup_commit_charge 2.18% kswapd0 [kernel.kallsyms] [k] __mem_cgroup_uncharge_common 1.92% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.86% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.62% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn After: 15.67% cat [kernel.kallsyms] [k] copy_user_generic_string 13.48% cat [kernel.kallsyms] [k] memset 11.42% cat [kernel.kallsyms] [k] do_mpage_readpage 3.98% cat [kernel.kallsyms] [k] get_page_from_freelist 2.46% cat [kernel.kallsyms] [k] put_page 2.13% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.88% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.67% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn 1.39% kswapd0 [kernel.kallsyms] [k] free_pcppages_bulk 1.30% cat [kernel.kallsyms] [k] kfree As you can see, the memcg footprint has shrunk quite a bit. text data bss dec hex filename 37970 9892 400 48262 bc86 mm/memcontrol.o.old 35239 9892 400 45531 b1db mm/memcontrol.o This patch (of 4): The memcg charge API charges pages before they are rmapped - i.e. have an actual "type" - and so every callsite needs its own set of charge and uncharge functions to know what type is being operated on. Worse, uncharge has to happen from a context that is still type-specific, rather than at the end of the page's lifetime with exclusive access, and so requires a lot of synchronization. Rewrite the charge API to provide a generic set of try_charge(), commit_charge() and cancel_charge() transaction operations, much like what's currently done for swap-in: mem_cgroup_try_charge() attempts to reserve a charge, reclaiming pages from the memcg if necessary. mem_cgroup_commit_charge() commits the page to the charge once it has a valid page->mapping and PageAnon() reliably tells the type. mem_cgroup_cancel_charge() aborts the transaction. This reduces the charge API and enables subsequent patches to drastically simplify uncharging. As pages need to be committed after rmap is established but before they are added to the LRU, page_add_new_anon_rmap() must stop doing LRU additions again. Revive lru_cache_add_active_or_unevictable(). [hughd@google.com: fix shmem_unuse] [hughd@google.com: Add comments on the private use of -EAGAIN] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/events/uprobes.c')
-rw-r--r--kernel/events/uprobes.c15
1 files changed, 8 insertions, 7 deletions
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 6f3254e8c137..1d0af8a2c646 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
167 /* For mmu_notifiers */ 167 /* For mmu_notifiers */
168 const unsigned long mmun_start = addr; 168 const unsigned long mmun_start = addr;
169 const unsigned long mmun_end = addr + PAGE_SIZE; 169 const unsigned long mmun_end = addr + PAGE_SIZE;
170 struct mem_cgroup *memcg;
171
172 err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
173 if (err)
174 return err;
170 175
171 /* For try_to_free_swap() and munlock_vma_page() below */ 176 /* For try_to_free_swap() and munlock_vma_page() below */
172 lock_page(page); 177 lock_page(page);
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
179 184
180 get_page(kpage); 185 get_page(kpage);
181 page_add_new_anon_rmap(kpage, vma, addr); 186 page_add_new_anon_rmap(kpage, vma, addr);
187 mem_cgroup_commit_charge(kpage, memcg, false);
188 lru_cache_add_active_or_unevictable(kpage, vma);
182 189
183 if (!PageAnon(page)) { 190 if (!PageAnon(page)) {
184 dec_mm_counter(mm, MM_FILEPAGES); 191 dec_mm_counter(mm, MM_FILEPAGES);
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
200 207
201 err = 0; 208 err = 0;
202 unlock: 209 unlock:
210 mem_cgroup_cancel_charge(kpage, memcg);
203 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 211 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
204 unlock_page(page); 212 unlock_page(page);
205 return err; 213 return err;
@@ -315,18 +323,11 @@ retry:
315 if (!new_page) 323 if (!new_page)
316 goto put_old; 324 goto put_old;
317 325
318 if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
319 goto put_new;
320
321 __SetPageUptodate(new_page); 326 __SetPageUptodate(new_page);
322 copy_highpage(new_page, old_page); 327 copy_highpage(new_page, old_page);
323 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); 328 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
324 329
325 ret = __replace_page(vma, vaddr, old_page, new_page); 330 ret = __replace_page(vma, vaddr, old_page, new_page);
326 if (ret)
327 mem_cgroup_uncharge_page(new_page);
328
329put_new:
330 page_cache_release(new_page); 331 page_cache_release(new_page);
331put_old: 332put_old:
332 put_page(old_page); 333 put_page(old_page);