aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2012-03-21 19:34:28 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 20:55:02 -0400
commit12724850e8064f64b6223d26d78c0597c742c65a (patch)
treef0b91eeae069fe92906db7ad9d0d8a8003f05db4 /mm/memcontrol.c
parentd8c37c480678ebe09bc570f33e085e28049db035 (diff)
memcg: avoid THP split in task migration
Currently we can't do task migration among memory cgroups without THP split, which means processes heavily using THP experience large overhead in task migration. This patch introduces the code for moving charge of THP and makes THP more valuable. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Acked-by: Hillf Danton <dhillf@gmail.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c85
1 files changed, 77 insertions, 8 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c8d00a9780bc..b2ee6df0e9bb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5256,6 +5256,41 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
5256 return ret; 5256 return ret;
5257} 5257}
5258 5258
5259#ifdef CONFIG_TRANSPARENT_HUGEPAGE
5260/*
5261 * We don't consider swapping or file mapped pages because THP does not
5262 * support them for now.
5263 * Caller should make sure that pmd_trans_huge(pmd) is true.
5264 */
5265static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
5266 unsigned long addr, pmd_t pmd, union mc_target *target)
5267{
5268 struct page *page = NULL;
5269 struct page_cgroup *pc;
5270 enum mc_target_type ret = MC_TARGET_NONE;
5271
5272 page = pmd_page(pmd);
5273 VM_BUG_ON(!page || !PageHead(page));
5274 if (!move_anon())
5275 return ret;
5276 pc = lookup_page_cgroup(page);
5277 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
5278 ret = MC_TARGET_PAGE;
5279 if (target) {
5280 get_page(page);
5281 target->page = page;
5282 }
5283 }
5284 return ret;
5285}
5286#else
5287static inline enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
5288 unsigned long addr, pmd_t pmd, union mc_target *target)
5289{
5290 return MC_TARGET_NONE;
5291}
5292#endif
5293
5259static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, 5294static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
5260 unsigned long addr, unsigned long end, 5295 unsigned long addr, unsigned long end,
5261 struct mm_walk *walk) 5296 struct mm_walk *walk)
@@ -5264,9 +5299,12 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
5264 pte_t *pte; 5299 pte_t *pte;
5265 spinlock_t *ptl; 5300 spinlock_t *ptl;
5266 5301
5267 split_huge_page_pmd(walk->mm, pmd); 5302 if (pmd_trans_huge_lock(pmd, vma) == 1) {
5268 if (pmd_trans_unstable(pmd)) 5303 if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
5304 mc.precharge += HPAGE_PMD_NR;
5305 spin_unlock(&vma->vm_mm->page_table_lock);
5269 return 0; 5306 return 0;
5307 }
5270 5308
5271 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 5309 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
5272 for (; addr != end; pte++, addr += PAGE_SIZE) 5310 for (; addr != end; pte++, addr += PAGE_SIZE)
@@ -5425,18 +5463,49 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
5425 struct vm_area_struct *vma = walk->private; 5463 struct vm_area_struct *vma = walk->private;
5426 pte_t *pte; 5464 pte_t *pte;
5427 spinlock_t *ptl; 5465 spinlock_t *ptl;
5466 enum mc_target_type target_type;
5467 union mc_target target;
5468 struct page *page;
5469 struct page_cgroup *pc;
5428 5470
5429 split_huge_page_pmd(walk->mm, pmd); 5471 /*
5430 if (pmd_trans_unstable(pmd)) 5472 * We don't take compound_lock() here but no race with splitting thp
5473 * happens because:
5474 * - if pmd_trans_huge_lock() returns 1, the relevant thp is not
5475 * under splitting, which means there's no concurrent thp split,
5476 * - if another thread runs into split_huge_page() just after we
5477 * entered this if-block, the thread must wait for page table lock
5478 * to be unlocked in __split_huge_page_splitting(), where the main
5479 * part of thp split is not executed yet.
5480 */
5481 if (pmd_trans_huge_lock(pmd, vma) == 1) {
5482 if (!mc.precharge) {
5483 spin_unlock(&vma->vm_mm->page_table_lock);
5484 return 0;
5485 }
5486 target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
5487 if (target_type == MC_TARGET_PAGE) {
5488 page = target.page;
5489 if (!isolate_lru_page(page)) {
5490 pc = lookup_page_cgroup(page);
5491 if (!mem_cgroup_move_account(page, HPAGE_PMD_NR,
5492 pc, mc.from, mc.to,
5493 false)) {
5494 mc.precharge -= HPAGE_PMD_NR;
5495 mc.moved_charge += HPAGE_PMD_NR;
5496 }
5497 putback_lru_page(page);
5498 }
5499 put_page(page);
5500 }
5501 spin_unlock(&vma->vm_mm->page_table_lock);
5431 return 0; 5502 return 0;
5503 }
5504
5432retry: 5505retry:
5433 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 5506 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
5434 for (; addr != end; addr += PAGE_SIZE) { 5507 for (; addr != end; addr += PAGE_SIZE) {
5435 pte_t ptent = *(pte++); 5508 pte_t ptent = *(pte++);
5436 union mc_target target;
5437 int type;
5438 struct page *page;
5439 struct page_cgroup *pc;
5440 swp_entry_t ent; 5509 swp_entry_t ent;
5441 5510
5442 if (!mc.precharge) 5511 if (!mc.precharge)