aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2012-03-21 19:33:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-21 20:54:56 -0400
commit5aaabe831eb527e0d9284f0745d830a755f70393 (patch)
tree2efdfe775da1c727c60ef5bd2d865d2150f68381 /fs
parentb716ad953a2bc4a543143c1d9836b7007a4b182f (diff)
pagemap: avoid splitting thp when reading /proc/pid/pagemap
Thp split is not necessary if we explicitly check whether pmds are mapping thps or not. This patch introduces this check and adds code to generate pagemap entries for pmds mapping thps, which results in less performance impact of pagemap on thp. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: David Rientjes <rientjes@google.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/proc/task_mmu.c52
1 files changed, 48 insertions, 4 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3efa7253523e..95264c0ef308 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -608,6 +608,9 @@ struct pagemapread {
608 u64 *buffer; 608 u64 *buffer;
609}; 609};
610 610
611#define PAGEMAP_WALK_SIZE (PMD_SIZE)
612#define PAGEMAP_WALK_MASK (PMD_MASK)
613
611#define PM_ENTRY_BYTES sizeof(u64) 614#define PM_ENTRY_BYTES sizeof(u64)
612#define PM_STATUS_BITS 3 615#define PM_STATUS_BITS 3
613#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) 616#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
@@ -666,6 +669,27 @@ static u64 pte_to_pagemap_entry(pte_t pte)
666 return pme; 669 return pme;
667} 670}
668 671
672#ifdef CONFIG_TRANSPARENT_HUGEPAGE
673static u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
674{
675 u64 pme = 0;
676 /*
677 * Currently pmd for thp is always present because thp can not be
678 * swapped-out, migrated, or HWPOISONed (split in such cases instead.)
679 * This if-check is just to prepare for future implementation.
680 */
681 if (pmd_present(pmd))
682 pme = PM_PFRAME(pmd_pfn(pmd) + offset)
683 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
684 return pme;
685}
686#else
687static inline u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
688{
689 return 0;
690}
691#endif
692
669static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 693static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
670 struct mm_walk *walk) 694 struct mm_walk *walk)
671{ 695{
@@ -673,15 +697,37 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
673 struct pagemapread *pm = walk->private; 697 struct pagemapread *pm = walk->private;
674 pte_t *pte; 698 pte_t *pte;
675 int err = 0; 699 int err = 0;
700 u64 pfn = PM_NOT_PRESENT;
676 701
677 split_huge_page_pmd(walk->mm, pmd);
678 if (pmd_trans_unstable(pmd)) 702 if (pmd_trans_unstable(pmd))
679 return 0; 703 return 0;
680 704
681 /* find the first VMA at or above 'addr' */ 705 /* find the first VMA at or above 'addr' */
682 vma = find_vma(walk->mm, addr); 706 vma = find_vma(walk->mm, addr);
707 spin_lock(&walk->mm->page_table_lock);
708 if (pmd_trans_huge(*pmd)) {
709 if (pmd_trans_splitting(*pmd)) {
710 spin_unlock(&walk->mm->page_table_lock);
711 wait_split_huge_page(vma->anon_vma, pmd);
712 } else {
713 for (; addr != end; addr += PAGE_SIZE) {
714 unsigned long offset;
715
716 offset = (addr & ~PAGEMAP_WALK_MASK) >>
717 PAGE_SHIFT;
718 pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
719 err = add_to_pagemap(addr, pfn, pm);
720 if (err)
721 break;
722 }
723 spin_unlock(&walk->mm->page_table_lock);
724 return err;
725 }
726 } else {
727 spin_unlock(&walk->mm->page_table_lock);
728 }
729
683 for (; addr != end; addr += PAGE_SIZE) { 730 for (; addr != end; addr += PAGE_SIZE) {
684 u64 pfn = PM_NOT_PRESENT;
685 731
686 /* check to see if we've left 'vma' behind 732 /* check to see if we've left 'vma' behind
687 * and need a new, higher one */ 733 * and need a new, higher one */
@@ -764,8 +810,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
764 * determine which areas of memory are actually mapped and llseek to 810 * determine which areas of memory are actually mapped and llseek to
765 * skip over unmapped regions. 811 * skip over unmapped regions.
766 */ 812 */
767#define PAGEMAP_WALK_SIZE (PMD_SIZE)
768#define PAGEMAP_WALK_MASK (PMD_MASK)
769static ssize_t pagemap_read(struct file *file, char __user *buf, 813static ssize_t pagemap_read(struct file *file, char __user *buf,
770 size_t count, loff_t *ppos) 814 size_t count, loff_t *ppos)
771{ 815{