aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Hansen <dave@linux.vnet.ibm.com>2008-06-12 18:21:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-06-12 21:05:41 -0400
commit2165009bdf63f79716a36ad545df14c3cdf958b7 (patch)
tree83d1735f2104b6b5158be56a362856ac1079861d
parentcfc53f65f56f9f33c0cf522124045ac5a64076b3 (diff)
pagemap: pass mm into pagewalkers
We need this at least for huge page detection for now, because powerpc needs the vm_area_struct to be able to determine whether a virtual address is referring to a huge page (its pmd_huge() doesn't work). It might also come in handy for some of the other users. Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com> Acked-by: Matt Mackall <mpm@selenic.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/task_mmu.c44
-rw-r--r--include/linux/mm.h17
-rw-r--r--mm/pagewalk.c42
3 files changed, 56 insertions, 47 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 17403629e330..f0df3109343d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -315,9 +315,9 @@ struct mem_size_stats {
315}; 315};
316 316
317static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 317static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
318 void *private) 318 struct mm_walk *walk)
319{ 319{
320 struct mem_size_stats *mss = private; 320 struct mem_size_stats *mss = walk->private;
321 struct vm_area_struct *vma = mss->vma; 321 struct vm_area_struct *vma = mss->vma;
322 pte_t *pte, ptent; 322 pte_t *pte, ptent;
323 spinlock_t *ptl; 323 spinlock_t *ptl;
@@ -365,19 +365,21 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
365 return 0; 365 return 0;
366} 366}
367 367
368static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
369
370static int show_smap(struct seq_file *m, void *v) 368static int show_smap(struct seq_file *m, void *v)
371{ 369{
372 struct vm_area_struct *vma = v; 370 struct vm_area_struct *vma = v;
373 struct mem_size_stats mss; 371 struct mem_size_stats mss;
374 int ret; 372 int ret;
373 struct mm_walk smaps_walk = {
374 .pmd_entry = smaps_pte_range,
375 .mm = vma->vm_mm,
376 .private = &mss,
377 };
375 378
376 memset(&mss, 0, sizeof mss); 379 memset(&mss, 0, sizeof mss);
377 mss.vma = vma; 380 mss.vma = vma;
378 if (vma->vm_mm && !is_vm_hugetlb_page(vma)) 381 if (vma->vm_mm && !is_vm_hugetlb_page(vma))
379 walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, 382 walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk);
380 &smaps_walk, &mss);
381 383
382 ret = show_map(m, v); 384 ret = show_map(m, v);
383 if (ret) 385 if (ret)
@@ -426,9 +428,9 @@ const struct file_operations proc_smaps_operations = {
426}; 428};
427 429
428static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, 430static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
429 unsigned long end, void *private) 431 unsigned long end, struct mm_walk *walk)
430{ 432{
431 struct vm_area_struct *vma = private; 433 struct vm_area_struct *vma = walk->private;
432 pte_t *pte, ptent; 434 pte_t *pte, ptent;
433 spinlock_t *ptl; 435 spinlock_t *ptl;
434 struct page *page; 436 struct page *page;
@@ -452,8 +454,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
452 return 0; 454 return 0;
453} 455}
454 456
455static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
456
457static ssize_t clear_refs_write(struct file *file, const char __user *buf, 457static ssize_t clear_refs_write(struct file *file, const char __user *buf,
458 size_t count, loff_t *ppos) 458 size_t count, loff_t *ppos)
459{ 459{
@@ -476,11 +476,17 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
476 return -ESRCH; 476 return -ESRCH;
477 mm = get_task_mm(task); 477 mm = get_task_mm(task);
478 if (mm) { 478 if (mm) {
479 static struct mm_walk clear_refs_walk;
480 memset(&clear_refs_walk, 0, sizeof(clear_refs_walk));
481 clear_refs_walk.pmd_entry = clear_refs_pte_range;
482 clear_refs_walk.mm = mm;
479 down_read(&mm->mmap_sem); 483 down_read(&mm->mmap_sem);
480 for (vma = mm->mmap; vma; vma = vma->vm_next) 484 for (vma = mm->mmap; vma; vma = vma->vm_next) {
485 clear_refs_walk.private = vma;
481 if (!is_vm_hugetlb_page(vma)) 486 if (!is_vm_hugetlb_page(vma))
482 walk_page_range(mm, vma->vm_start, vma->vm_end, 487 walk_page_range(vma->vm_start, vma->vm_end,
483 &clear_refs_walk, vma); 488 &clear_refs_walk);
489 }
484 flush_tlb_mm(mm); 490 flush_tlb_mm(mm);
485 up_read(&mm->mmap_sem); 491 up_read(&mm->mmap_sem);
486 mmput(mm); 492 mmput(mm);
@@ -528,9 +534,9 @@ static int add_to_pagemap(unsigned long addr, u64 pfn,
528} 534}
529 535
530static int pagemap_pte_hole(unsigned long start, unsigned long end, 536static int pagemap_pte_hole(unsigned long start, unsigned long end,
531 void *private) 537 struct mm_walk *walk)
532{ 538{
533 struct pagemapread *pm = private; 539 struct pagemapread *pm = walk->private;
534 unsigned long addr; 540 unsigned long addr;
535 int err = 0; 541 int err = 0;
536 for (addr = start; addr < end; addr += PAGE_SIZE) { 542 for (addr = start; addr < end; addr += PAGE_SIZE) {
@@ -548,9 +554,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte)
548} 554}
549 555
550static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 556static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
551 void *private) 557 struct mm_walk *walk)
552{ 558{
553 struct pagemapread *pm = private; 559 struct pagemapread *pm = walk->private;
554 pte_t *pte; 560 pte_t *pte;
555 int err = 0; 561 int err = 0;
556 562
@@ -675,8 +681,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
675 * user buffer is tracked in "pm", and the walk 681 * user buffer is tracked in "pm", and the walk
676 * will stop when we hit the end of the buffer. 682 * will stop when we hit the end of the buffer.
677 */ 683 */
678 ret = walk_page_range(mm, start_vaddr, end_vaddr, 684 ret = walk_page_range(start_vaddr, end_vaddr,
679 &pagemap_walk, &pm); 685 &pagemap_walk);
680 if (ret == PM_END_OF_BUFFER) 686 if (ret == PM_END_OF_BUFFER)
681 ret = 0; 687 ret = 0;
682 /* don't need mmap_sem for these, but this looks cleaner */ 688 /* don't need mmap_sem for these, but this looks cleaner */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c31a9cd2a30e..586a943cab01 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -760,16 +760,17 @@ unsigned long unmap_vmas(struct mmu_gather **tlb,
760 * (see walk_page_range for more details) 760 * (see walk_page_range for more details)
761 */ 761 */
762struct mm_walk { 762struct mm_walk {
763 int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *); 763 int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, struct mm_walk *);
764 int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *); 764 int (*pud_entry)(pud_t *, unsigned long, unsigned long, struct mm_walk *);
765 int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *); 765 int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, struct mm_walk *);
766 int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *); 766 int (*pte_entry)(pte_t *, unsigned long, unsigned long, struct mm_walk *);
767 int (*pte_hole)(unsigned long, unsigned long, void *); 767 int (*pte_hole)(unsigned long, unsigned long, struct mm_walk *);
768 struct mm_struct *mm;
769 void *private;
768}; 770};
769 771
770int walk_page_range(const struct mm_struct *, unsigned long addr, 772int walk_page_range(unsigned long addr, unsigned long end,
771 unsigned long end, const struct mm_walk *walk, 773 struct mm_walk *walk);
772 void *private);
773void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, 774void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
774 unsigned long end, unsigned long floor, unsigned long ceiling); 775 unsigned long end, unsigned long floor, unsigned long ceiling);
775void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, 776void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 0afd2387e507..d5878bed7841 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -3,14 +3,14 @@
3#include <linux/sched.h> 3#include <linux/sched.h>
4 4
5static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 5static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
6 const struct mm_walk *walk, void *private) 6 struct mm_walk *walk)
7{ 7{
8 pte_t *pte; 8 pte_t *pte;
9 int err = 0; 9 int err = 0;
10 10
11 pte = pte_offset_map(pmd, addr); 11 pte = pte_offset_map(pmd, addr);
12 for (;;) { 12 for (;;) {
13 err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, private); 13 err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
14 if (err) 14 if (err)
15 break; 15 break;
16 addr += PAGE_SIZE; 16 addr += PAGE_SIZE;
@@ -24,7 +24,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
24} 24}
25 25
26static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, 26static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
27 const struct mm_walk *walk, void *private) 27 struct mm_walk *walk)
28{ 28{
29 pmd_t *pmd; 29 pmd_t *pmd;
30 unsigned long next; 30 unsigned long next;
@@ -35,15 +35,15 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
35 next = pmd_addr_end(addr, end); 35 next = pmd_addr_end(addr, end);
36 if (pmd_none_or_clear_bad(pmd)) { 36 if (pmd_none_or_clear_bad(pmd)) {
37 if (walk->pte_hole) 37 if (walk->pte_hole)
38 err = walk->pte_hole(addr, next, private); 38 err = walk->pte_hole(addr, next, walk);
39 if (err) 39 if (err)
40 break; 40 break;
41 continue; 41 continue;
42 } 42 }
43 if (walk->pmd_entry) 43 if (walk->pmd_entry)
44 err = walk->pmd_entry(pmd, addr, next, private); 44 err = walk->pmd_entry(pmd, addr, next, walk);
45 if (!err && walk->pte_entry) 45 if (!err && walk->pte_entry)
46 err = walk_pte_range(pmd, addr, next, walk, private); 46 err = walk_pte_range(pmd, addr, next, walk);
47 if (err) 47 if (err)
48 break; 48 break;
49 } while (pmd++, addr = next, addr != end); 49 } while (pmd++, addr = next, addr != end);
@@ -52,7 +52,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
52} 52}
53 53
54static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, 54static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
55 const struct mm_walk *walk, void *private) 55 struct mm_walk *walk)
56{ 56{
57 pud_t *pud; 57 pud_t *pud;
58 unsigned long next; 58 unsigned long next;
@@ -63,15 +63,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
63 next = pud_addr_end(addr, end); 63 next = pud_addr_end(addr, end);
64 if (pud_none_or_clear_bad(pud)) { 64 if (pud_none_or_clear_bad(pud)) {
65 if (walk->pte_hole) 65 if (walk->pte_hole)
66 err = walk->pte_hole(addr, next, private); 66 err = walk->pte_hole(addr, next, walk);
67 if (err) 67 if (err)
68 break; 68 break;
69 continue; 69 continue;
70 } 70 }
71 if (walk->pud_entry) 71 if (walk->pud_entry)
72 err = walk->pud_entry(pud, addr, next, private); 72 err = walk->pud_entry(pud, addr, next, walk);
73 if (!err && (walk->pmd_entry || walk->pte_entry)) 73 if (!err && (walk->pmd_entry || walk->pte_entry))
74 err = walk_pmd_range(pud, addr, next, walk, private); 74 err = walk_pmd_range(pud, addr, next, walk);
75 if (err) 75 if (err)
76 break; 76 break;
77 } while (pud++, addr = next, addr != end); 77 } while (pud++, addr = next, addr != end);
@@ -85,15 +85,15 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
85 * @addr: starting address 85 * @addr: starting address
86 * @end: ending address 86 * @end: ending address
87 * @walk: set of callbacks to invoke for each level of the tree 87 * @walk: set of callbacks to invoke for each level of the tree
88 * @private: private data passed to the callback function
89 * 88 *
90 * Recursively walk the page table for the memory area in a VMA, 89 * Recursively walk the page table for the memory area in a VMA,
91 * calling supplied callbacks. Callbacks are called in-order (first 90 * calling supplied callbacks. Callbacks are called in-order (first
92 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, 91 * PGD, first PUD, first PMD, first PTE, second PTE... second PMD,
93 * etc.). If lower-level callbacks are omitted, walking depth is reduced. 92 * etc.). If lower-level callbacks are omitted, walking depth is reduced.
94 * 93 *
95 * Each callback receives an entry pointer, the start and end of the 94 * Each callback receives an entry pointer and the start and end of the
96 * associated range, and a caller-supplied private data pointer. 95 * associated range, and a copy of the original mm_walk for access to
96 * the ->private or ->mm fields.
97 * 97 *
98 * No locks are taken, but the bottom level iterator will map PTE 98 * No locks are taken, but the bottom level iterator will map PTE
99 * directories from highmem if necessary. 99 * directories from highmem if necessary.
@@ -101,9 +101,8 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
101 * If any callback returns a non-zero value, the walk is aborted and 101 * If any callback returns a non-zero value, the walk is aborted and
102 * the return value is propagated back to the caller. Otherwise 0 is returned. 102 * the return value is propagated back to the caller. Otherwise 0 is returned.
103 */ 103 */
104int walk_page_range(const struct mm_struct *mm, 104int walk_page_range(unsigned long addr, unsigned long end,
105 unsigned long addr, unsigned long end, 105 struct mm_walk *walk)
106 const struct mm_walk *walk, void *private)
107{ 106{
108 pgd_t *pgd; 107 pgd_t *pgd;
109 unsigned long next; 108 unsigned long next;
@@ -112,21 +111,24 @@ int walk_page_range(const struct mm_struct *mm,
112 if (addr >= end) 111 if (addr >= end)
113 return err; 112 return err;
114 113
115 pgd = pgd_offset(mm, addr); 114 if (!walk->mm)
115 return -EINVAL;
116
117 pgd = pgd_offset(walk->mm, addr);
116 do { 118 do {
117 next = pgd_addr_end(addr, end); 119 next = pgd_addr_end(addr, end);
118 if (pgd_none_or_clear_bad(pgd)) { 120 if (pgd_none_or_clear_bad(pgd)) {
119 if (walk->pte_hole) 121 if (walk->pte_hole)
120 err = walk->pte_hole(addr, next, private); 122 err = walk->pte_hole(addr, next, walk);
121 if (err) 123 if (err)
122 break; 124 break;
123 continue; 125 continue;
124 } 126 }
125 if (walk->pgd_entry) 127 if (walk->pgd_entry)
126 err = walk->pgd_entry(pgd, addr, next, private); 128 err = walk->pgd_entry(pgd, addr, next, walk);
127 if (!err && 129 if (!err &&
128 (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) 130 (walk->pud_entry || walk->pmd_entry || walk->pte_entry))
129 err = walk_pud_range(pgd, addr, next, walk, private); 131 err = walk_pud_range(pgd, addr, next, walk);
130 if (err) 132 if (err)
131 break; 133 break;
132 } while (pgd++, addr = next, addr != end); 134 } while (pgd++, addr = next, addr != end);