diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2009-12-14 21:00:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-12-15 11:53:24 -0500 |
commit | 5dc37642cbce34619e4588a9f0bdad1d2f870956 (patch) | |
tree | 00165a3a0b3d768abaf83cf44642f09398b989cc | |
parent | d33b9f45bd24a6391bc05e2b5a13c1b5787ca9c2 (diff) |
mm hugetlb: add hugepage support to pagemap
This patch enables extraction of the pfn of a hugepage from
/proc/pid/pagemap in an architecture independent manner.
Details
-------
My test program (leak_pagemap) works as follows:
- creat() and mmap() a file on hugetlbfs (file size is 200MB == 100 hugepages,)
- read()/write() something on it,
- call page-types with option -p,
- munmap() and unlink() the file on hugetlbfs
Without my patches
------------------
$ ./leak_pagemap
flags page-count MB symbolic-flags long-symbolic-flags
0x0000000000000000 1 0 __________________________________
0x0000000000000804 1 0 __R________M______________________ referenced,mmap
0x000000000000086c 81 0 __RU_lA____M______________________ referenced,uptodate,lru,active,mmap
0x0000000000005808 5 0 ___U_______Ma_b___________________ uptodate,mmap,anonymous,swapbacked
0x0000000000005868 12 0 ___U_lA____Ma_b___________________ uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c 1 0 __RU_lA____Ma_b___________________ referenced,uptodate,lru,active,mmap,anonymous,swapbacked
total 101 0
The output of page-types don't show any hugepage.
With my patches
---------------
$ ./leak_pagemap
flags page-count MB symbolic-flags long-symbolic-flags
0x0000000000000000 1 0 __________________________________
0x0000000000030000 51100 199 ________________TG________________ compound_tail,huge
0x0000000000028018 100 0 ___UD__________H_G________________ uptodate,dirty,compound_head,huge
0x0000000000000804 1 0 __R________M______________________ referenced,mmap
0x000000000000080c 1 0 __RU_______M______________________ referenced,uptodate,mmap
0x000000000000086c 80 0 __RU_lA____M______________________ referenced,uptodate,lru,active,mmap
0x0000000000005808 4 0 ___U_______Ma_b___________________ uptodate,mmap,anonymous,swapbacked
0x0000000000005868 12 0 ___U_lA____Ma_b___________________ uptodate,lru,active,mmap,anonymous,swapbacked
0x000000000000586c 1 0 __RU_lA____Ma_b___________________ referenced,uptodate,lru,active,mmap,anonymous,swapbacked
total 51300 200
The output of page-types shows 51200 pages contributing to hugepages,
containing 100 head pages and 51100 tail pages as expected.
[akpm@linux-foundation.org: build fix]
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/task_mmu.c | 45 | ||||
-rw-r--r-- | include/linux/mm.h | 3 | ||||
-rw-r--r-- | mm/pagewalk.c | 22 |
3 files changed, 67 insertions, 3 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2a1bef9203c6..47c03f4336b8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -650,6 +650,50 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
650 | return err; | 650 | return err; |
651 | } | 651 | } |
652 | 652 | ||
653 | static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset) | ||
654 | { | ||
655 | u64 pme = 0; | ||
656 | if (pte_present(pte)) | ||
657 | pme = PM_PFRAME(pte_pfn(pte) + offset) | ||
658 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT; | ||
659 | return pme; | ||
660 | } | ||
661 | |||
662 | static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr, | ||
663 | unsigned long end, struct mm_walk *walk) | ||
664 | { | ||
665 | struct vm_area_struct *vma; | ||
666 | struct pagemapread *pm = walk->private; | ||
667 | struct hstate *hs = NULL; | ||
668 | int err = 0; | ||
669 | |||
670 | vma = find_vma(walk->mm, addr); | ||
671 | if (vma) | ||
672 | hs = hstate_vma(vma); | ||
673 | for (; addr != end; addr += PAGE_SIZE) { | ||
674 | u64 pfn = PM_NOT_PRESENT; | ||
675 | |||
676 | if (vma && (addr >= vma->vm_end)) { | ||
677 | vma = find_vma(walk->mm, addr); | ||
678 | if (vma) | ||
679 | hs = hstate_vma(vma); | ||
680 | } | ||
681 | |||
682 | if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) { | ||
683 | /* calculate pfn of the "raw" page in the hugepage. */ | ||
684 | int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT; | ||
685 | pfn = huge_pte_to_pagemap_entry(*pte, offset); | ||
686 | } | ||
687 | err = add_to_pagemap(addr, pfn, pm); | ||
688 | if (err) | ||
689 | return err; | ||
690 | } | ||
691 | |||
692 | cond_resched(); | ||
693 | |||
694 | return err; | ||
695 | } | ||
696 | |||
653 | /* | 697 | /* |
654 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | 698 | * /proc/pid/pagemap - an array mapping virtual pages to pfns |
655 | * | 699 | * |
@@ -742,6 +786,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
742 | 786 | ||
743 | pagemap_walk.pmd_entry = pagemap_pte_range; | 787 | pagemap_walk.pmd_entry = pagemap_pte_range; |
744 | pagemap_walk.pte_hole = pagemap_pte_hole; | 788 | pagemap_walk.pte_hole = pagemap_pte_hole; |
789 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; | ||
745 | pagemap_walk.mm = mm; | 790 | pagemap_walk.mm = mm; |
746 | pagemap_walk.private = ± | 791 | pagemap_walk.private = ± |
747 | 792 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 52b264563cd9..9d65ae4ba0e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -770,6 +770,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlb, | |||
770 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry | 770 | * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry |
771 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry | 771 | * @pte_entry: if set, called for each non-empty PTE (4th-level) entry |
772 | * @pte_hole: if set, called for each hole at all levels | 772 | * @pte_hole: if set, called for each hole at all levels |
773 | * @hugetlb_entry: if set, called for each hugetlb entry | ||
773 | * | 774 | * |
774 | * (see walk_page_range for more details) | 775 | * (see walk_page_range for more details) |
775 | */ | 776 | */ |
@@ -779,6 +780,8 @@ struct mm_walk { | |||
779 | int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *); | 780 | int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *); |
780 | int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *); | 781 | int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *); |
781 | int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *); | 782 | int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *); |
783 | int (*hugetlb_entry)(pte_t *, unsigned long, unsigned long, | ||
784 | struct mm_walk *); | ||
782 | struct mm_struct *mm; | 785 | struct mm_struct *mm; |
783 | void *private; | 786 | void *private; |
784 | }; | 787 | }; |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index a286915e23ef..7b47a57b6646 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -120,15 +120,31 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
120 | do { | 120 | do { |
121 | next = pgd_addr_end(addr, end); | 121 | next = pgd_addr_end(addr, end); |
122 | 122 | ||
123 | /* skip hugetlb vma to avoid hugepage PMD being cleared | 123 | /* |
124 | * in pmd_none_or_clear_bad(). */ | 124 | * handle hugetlb vma individually because pagetable walk for |
125 | * the hugetlb page is dependent on the architecture and | ||
126 | * we can't handled it in the same manner as non-huge pages. | ||
127 | */ | ||
125 | vma = find_vma(walk->mm, addr); | 128 | vma = find_vma(walk->mm, addr); |
129 | #ifdef CONFIG_HUGETLB_PAGE | ||
126 | if (vma && is_vm_hugetlb_page(vma)) { | 130 | if (vma && is_vm_hugetlb_page(vma)) { |
131 | pte_t *pte; | ||
132 | struct hstate *hs; | ||
133 | |||
127 | if (vma->vm_end < next) | 134 | if (vma->vm_end < next) |
128 | next = vma->vm_end; | 135 | next = vma->vm_end; |
136 | hs = hstate_vma(vma); | ||
137 | pte = huge_pte_offset(walk->mm, | ||
138 | addr & huge_page_mask(hs)); | ||
139 | if (pte && !huge_pte_none(huge_ptep_get(pte)) | ||
140 | && walk->hugetlb_entry) | ||
141 | err = walk->hugetlb_entry(pte, addr, | ||
142 | next, walk); | ||
143 | if (err) | ||
144 | break; | ||
129 | continue; | 145 | continue; |
130 | } | 146 | } |
131 | 147 | #endif | |
132 | if (pgd_none_or_clear_bad(pgd)) { | 148 | if (pgd_none_or_clear_bad(pgd)) { |
133 | if (walk->pte_hole) | 149 | if (walk->pte_hole) |
134 | err = walk->pte_hole(addr, next, walk); | 150 | err = walk->pte_hole(addr, next, walk); |