aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2009-12-14 21:00:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:24 -0500
commit5dc37642cbce34619e4588a9f0bdad1d2f870956 (patch)
tree00165a3a0b3d768abaf83cf44642f09398b989cc
parentd33b9f45bd24a6391bc05e2b5a13c1b5787ca9c2 (diff)
mm hugetlb: add hugepage support to pagemap
This patch enables extraction of the pfn of a hugepage from /proc/pid/pagemap in an architecture independent manner. Details ------- My test program (leak_pagemap) works as follows: - creat() and mmap() a file on hugetlbfs (file size is 200MB == 100 hugepages,) - read()/write() something on it, - call page-types with option -p, - munmap() and unlink() the file on hugetlbfs Without my patches ------------------ $ ./leak_pagemap flags page-count MB symbolic-flags long-symbolic-flags 0x0000000000000000 1 0 __________________________________ 0x0000000000000804 1 0 __R________M______________________ referenced,mmap 0x000000000000086c 81 0 __RU_lA____M______________________ referenced,uptodate,lru,active,mmap 0x0000000000005808 5 0 ___U_______Ma_b___________________ uptodate,mmap,anonymous,swapbacked 0x0000000000005868 12 0 ___U_lA____Ma_b___________________ uptodate,lru,active,mmap,anonymous,swapbacked 0x000000000000586c 1 0 __RU_lA____Ma_b___________________ referenced,uptodate,lru,active,mmap,anonymous,swapbacked total 101 0 The output of page-types don't show any hugepage. With my patches --------------- $ ./leak_pagemap flags page-count MB symbolic-flags long-symbolic-flags 0x0000000000000000 1 0 __________________________________ 0x0000000000030000 51100 199 ________________TG________________ compound_tail,huge 0x0000000000028018 100 0 ___UD__________H_G________________ uptodate,dirty,compound_head,huge 0x0000000000000804 1 0 __R________M______________________ referenced,mmap 0x000000000000080c 1 0 __RU_______M______________________ referenced,uptodate,mmap 0x000000000000086c 80 0 __RU_lA____M______________________ referenced,uptodate,lru,active,mmap 0x0000000000005808 4 0 ___U_______Ma_b___________________ uptodate,mmap,anonymous,swapbacked 0x0000000000005868 12 0 ___U_lA____Ma_b___________________ uptodate,lru,active,mmap,anonymous,swapbacked 0x000000000000586c 1 0 __RU_lA____Ma_b___________________ referenced,uptodate,lru,active,mmap,anonymous,swapbacked total 51300 200 The output of page-types shows 51200 pages contributing to hugepages, containing 100 head pages and 51100 tail pages as expected. [akpm@linux-foundation.org: build fix] Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/task_mmu.c45
-rw-r--r--include/linux/mm.h3
-rw-r--r--mm/pagewalk.c22
3 files changed, 67 insertions, 3 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 2a1bef9203c6..47c03f4336b8 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -650,6 +650,50 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
650 return err; 650 return err;
651} 651}
652 652
653static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
654{
655 u64 pme = 0;
656 if (pte_present(pte))
657 pme = PM_PFRAME(pte_pfn(pte) + offset)
658 | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
659 return pme;
660}
661
662static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr,
663 unsigned long end, struct mm_walk *walk)
664{
665 struct vm_area_struct *vma;
666 struct pagemapread *pm = walk->private;
667 struct hstate *hs = NULL;
668 int err = 0;
669
670 vma = find_vma(walk->mm, addr);
671 if (vma)
672 hs = hstate_vma(vma);
673 for (; addr != end; addr += PAGE_SIZE) {
674 u64 pfn = PM_NOT_PRESENT;
675
676 if (vma && (addr >= vma->vm_end)) {
677 vma = find_vma(walk->mm, addr);
678 if (vma)
679 hs = hstate_vma(vma);
680 }
681
682 if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
683 /* calculate pfn of the "raw" page in the hugepage. */
684 int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
685 pfn = huge_pte_to_pagemap_entry(*pte, offset);
686 }
687 err = add_to_pagemap(addr, pfn, pm);
688 if (err)
689 return err;
690 }
691
692 cond_resched();
693
694 return err;
695}
696
653/* 697/*
654 * /proc/pid/pagemap - an array mapping virtual pages to pfns 698 * /proc/pid/pagemap - an array mapping virtual pages to pfns
655 * 699 *
@@ -742,6 +786,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
742 786
743 pagemap_walk.pmd_entry = pagemap_pte_range; 787 pagemap_walk.pmd_entry = pagemap_pte_range;
744 pagemap_walk.pte_hole = pagemap_pte_hole; 788 pagemap_walk.pte_hole = pagemap_pte_hole;
789 pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
745 pagemap_walk.mm = mm; 790 pagemap_walk.mm = mm;
746 pagemap_walk.private = &pm; 791 pagemap_walk.private = &pm;
747 792
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52b264563cd9..9d65ae4ba0e0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -770,6 +770,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
770 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry 770 * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
771 * @pte_entry: if set, called for each non-empty PTE (4th-level) entry 771 * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
772 * @pte_hole: if set, called for each hole at all levels 772 * @pte_hole: if set, called for each hole at all levels
773 * @hugetlb_entry: if set, called for each hugetlb entry
773 * 774 *
774 * (see walk_page_range for more details) 775 * (see walk_page_range for more details)
775 */ 776 */
@@ -779,6 +780,8 @@ struct mm_walk {
779 int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *); 780 int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
780 int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *); 781 int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
781 int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *); 782 int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
783 int (*hugetlb_entry)(pte_t *, unsigned long, unsigned long,
784 struct mm_walk *);
782 struct mm_struct *mm; 785 struct mm_struct *mm;
783 void *private; 786 void *private;
784}; 787};
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index a286915e23ef..7b47a57b6646 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -120,15 +120,31 @@ int walk_page_range(unsigned long addr, unsigned long end,
120 do { 120 do {
121 next = pgd_addr_end(addr, end); 121 next = pgd_addr_end(addr, end);
122 122
123 /* skip hugetlb vma to avoid hugepage PMD being cleared 123 /*
124 * in pmd_none_or_clear_bad(). */ 124 * handle hugetlb vma individually because pagetable walk for
125 * the hugetlb page is dependent on the architecture and
126 * we can't handled it in the same manner as non-huge pages.
127 */
125 vma = find_vma(walk->mm, addr); 128 vma = find_vma(walk->mm, addr);
129#ifdef CONFIG_HUGETLB_PAGE
126 if (vma && is_vm_hugetlb_page(vma)) { 130 if (vma && is_vm_hugetlb_page(vma)) {
131 pte_t *pte;
132 struct hstate *hs;
133
127 if (vma->vm_end < next) 134 if (vma->vm_end < next)
128 next = vma->vm_end; 135 next = vma->vm_end;
136 hs = hstate_vma(vma);
137 pte = huge_pte_offset(walk->mm,
138 addr & huge_page_mask(hs));
139 if (pte && !huge_pte_none(huge_ptep_get(pte))
140 && walk->hugetlb_entry)
141 err = walk->hugetlb_entry(pte, addr,
142 next, walk);
143 if (err)
144 break;
129 continue; 145 continue;
130 } 146 }
131 147#endif
132 if (pgd_none_or_clear_bad(pgd)) { 148 if (pgd_none_or_clear_bad(pgd)) {
133 if (walk->pte_hole) 149 if (walk->pte_hole)
134 err = walk->pte_hole(addr, next, walk); 150 err = walk->pte_hole(addr, next, walk);