aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2010-04-06 17:35:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-07 11:38:04 -0400
commit116354d177ba2da37e91cf884e3d11e67f825efd (patch)
tree150227e8a5c9ce8b6dd9333593ed513c8c012686
parent57119c34e53bbb8d244c3ff1335ef5145768538f (diff)
pagemap: fix pfn calculation for hugepage
When we look into pagemap using page-types with option -p, the value of pfn for hugepages looks wrong (see below.) This is because pte was evaluated only once for one vma although it should be updated for each hugepage. This patch fixes it. $ page-types -p 3277 -Nl -b huge voffset offset len flags 7f21e8a00 11e400 1 ___U___________H_G________________ 7f21e8a01 11e401 1ff ________________TG________________ ^^^ 7f21e8c00 11e400 1 ___U___________H_G________________ 7f21e8c01 11e401 1ff ________________TG________________ ^^^ One hugepage contains 1 head page and 511 tail pages in x86_64 and each two lines represent each hugepage. Voffset and offset mean virtual address and physical address in the page unit, respectively. The different hugepages should not have the same offset value. With this patch applied: $ page-types -p 3386 -Nl -b huge voffset offset len flags 7fec7a600 112c00 1 ___UD__________H_G________________ 7fec7a601 112c01 1ff ________________TG________________ ^^^ 7fec7a800 113200 1 ___UD__________H_G________________ 7fec7a801 113201 1ff ________________TG________________ ^^^ OK More info: - This patch modifies walk_page_range()'s hugepage walker. But the change only affects pagemap_read(), which is the only caller of hugepage callback. - Without this patch, hugetlb_entry() callback is called per vma, that doesn't match the natural expectation from its name. - With this patch, hugetlb_entry() is called per hugepte entry and the callback can become much simpler. Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Matt Mackall <mpm@selenic.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/task_mmu.c27
-rw-r--r--include/linux/mm.h4
-rw-r--r--mm/pagewalk.c47
3 files changed, 46 insertions, 32 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a05a669510a4..070553427dd5 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -662,31 +662,18 @@ static u64 huge_pte_to_pagemap_entry(pte_t pte, int offset)
662 return pme; 662 return pme;
663} 663}
664 664
665static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr, 665/* This function walks within one hugetlb entry in the single call */
666 unsigned long end, struct mm_walk *walk) 666static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
667 unsigned long addr, unsigned long end,
668 struct mm_walk *walk)
667{ 669{
668 struct vm_area_struct *vma;
669 struct pagemapread *pm = walk->private; 670 struct pagemapread *pm = walk->private;
670 struct hstate *hs = NULL;
671 int err = 0; 671 int err = 0;
672 u64 pfn;
672 673
673 vma = find_vma(walk->mm, addr);
674 if (vma)
675 hs = hstate_vma(vma);
676 for (; addr != end; addr += PAGE_SIZE) { 674 for (; addr != end; addr += PAGE_SIZE) {
677 u64 pfn = PM_NOT_PRESENT; 675 int offset = (addr & ~hmask) >> PAGE_SHIFT;
678 676 pfn = huge_pte_to_pagemap_entry(*pte, offset);
679 if (vma && (addr >= vma->vm_end)) {
680 vma = find_vma(walk->mm, addr);
681 if (vma)
682 hs = hstate_vma(vma);
683 }
684
685 if (vma && (vma->vm_start <= addr) && is_vm_hugetlb_page(vma)) {
686 /* calculate pfn of the "raw" page in the hugepage. */
687 int offset = (addr & ~huge_page_mask(hs)) >> PAGE_SHIFT;
688 pfn = huge_pte_to_pagemap_entry(*pte, offset);
689 }
690 err = add_to_pagemap(addr, pfn, pm); 677 err = add_to_pagemap(addr, pfn, pm);
691 if (err) 678 if (err)
692 return err; 679 return err;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e70f21beb4b4..462acaf36f3a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -783,8 +783,8 @@ struct mm_walk {
783 int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *); 783 int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
784 int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *); 784 int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
785 int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *); 785 int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
786 int (*hugetlb_entry)(pte_t *, unsigned long, unsigned long, 786 int (*hugetlb_entry)(pte_t *, unsigned long,
787 struct mm_walk *); 787 unsigned long, unsigned long, struct mm_walk *);
788 struct mm_struct *mm; 788 struct mm_struct *mm;
789 void *private; 789 void *private;
790}; 790};
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 7b47a57b6646..8b1a2ce21ee5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -80,6 +80,37 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
80 return err; 80 return err;
81} 81}
82 82
83#ifdef CONFIG_HUGETLB_PAGE
84static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
85 unsigned long end)
86{
87 unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
88 return boundary < end ? boundary : end;
89}
90
91static int walk_hugetlb_range(struct vm_area_struct *vma,
92 unsigned long addr, unsigned long end,
93 struct mm_walk *walk)
94{
95 struct hstate *h = hstate_vma(vma);
96 unsigned long next;
97 unsigned long hmask = huge_page_mask(h);
98 pte_t *pte;
99 int err = 0;
100
101 do {
102 next = hugetlb_entry_end(h, addr, end);
103 pte = huge_pte_offset(walk->mm, addr & hmask);
104 if (pte && walk->hugetlb_entry)
105 err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
106 if (err)
107 return err;
108 } while (addr = next, addr != end);
109
110 return 0;
111}
112#endif
113
83/** 114/**
84 * walk_page_range - walk a memory map's page tables with a callback 115 * walk_page_range - walk a memory map's page tables with a callback
85 * @mm: memory map to walk 116 * @mm: memory map to walk
@@ -128,20 +159,16 @@ int walk_page_range(unsigned long addr, unsigned long end,
128 vma = find_vma(walk->mm, addr); 159 vma = find_vma(walk->mm, addr);
129#ifdef CONFIG_HUGETLB_PAGE 160#ifdef CONFIG_HUGETLB_PAGE
130 if (vma && is_vm_hugetlb_page(vma)) { 161 if (vma && is_vm_hugetlb_page(vma)) {
131 pte_t *pte;
132 struct hstate *hs;
133
134 if (vma->vm_end < next) 162 if (vma->vm_end < next)
135 next = vma->vm_end; 163 next = vma->vm_end;
136 hs = hstate_vma(vma); 164 /*
137 pte = huge_pte_offset(walk->mm, 165 * Hugepage is very tightly coupled with vma, so
138 addr & huge_page_mask(hs)); 166 * walk through hugetlb entries within a given vma.
139 if (pte && !huge_pte_none(huge_ptep_get(pte)) 167 */
140 && walk->hugetlb_entry) 168 err = walk_hugetlb_range(vma, addr, next, walk);
141 err = walk->hugetlb_entry(pte, addr,
142 next, walk);
143 if (err) 169 if (err)
144 break; 170 break;
171 pgd = pgd_offset(walk->mm, next);
145 continue; 172 continue;
146 } 173 }
147#endif 174#endif