aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mincore.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mincore.c')
-rw-r--r--mm/mincore.c263
1 files changed, 151 insertions, 112 deletions
diff --git a/mm/mincore.c b/mm/mincore.c
index f77433c20279..9ac42dc6d7b6 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -19,6 +19,40 @@
19#include <asm/uaccess.h> 19#include <asm/uaccess.h>
20#include <asm/pgtable.h> 20#include <asm/pgtable.h>
21 21
22static void mincore_hugetlb_page_range(struct vm_area_struct *vma,
23 unsigned long addr, unsigned long end,
24 unsigned char *vec)
25{
26#ifdef CONFIG_HUGETLB_PAGE
27 struct hstate *h;
28
29 h = hstate_vma(vma);
30 while (1) {
31 unsigned char present;
32 pte_t *ptep;
33 /*
34 * Huge pages are always in RAM for now, but
35 * theoretically it needs to be checked.
36 */
37 ptep = huge_pte_offset(current->mm,
38 addr & huge_page_mask(h));
39 present = ptep && !huge_pte_none(huge_ptep_get(ptep));
40 while (1) {
41 *vec = present;
42 vec++;
43 addr += PAGE_SIZE;
44 if (addr == end)
45 return;
46 /* check hugepage border */
47 if (!(addr & ~huge_page_mask(h)))
48 break;
49 }
50 }
51#else
52 BUG();
53#endif
54}
55
22/* 56/*
23 * Later we can get more picky about what "in core" means precisely. 57 * Later we can get more picky about what "in core" means precisely.
24 * For now, simply check to see if the page is in the page cache, 58 * For now, simply check to see if the page is in the page cache,
@@ -49,145 +83,150 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
49 return present; 83 return present;
50} 84}
51 85
52/* 86static void mincore_unmapped_range(struct vm_area_struct *vma,
53 * Do a chunk of "sys_mincore()". We've already checked 87 unsigned long addr, unsigned long end,
54 * all the arguments, we hold the mmap semaphore: we should 88 unsigned char *vec)
55 * just return the amount of info we're asked for.
56 */
57static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
58{ 89{
59 pgd_t *pgd; 90 unsigned long nr = (end - addr) >> PAGE_SHIFT;
60 pud_t *pud;
61 pmd_t *pmd;
62 pte_t *ptep;
63 spinlock_t *ptl;
64 unsigned long nr;
65 int i; 91 int i;
66 pgoff_t pgoff;
67 struct vm_area_struct *vma = find_vma(current->mm, addr);
68 92
69 /* 93 if (vma->vm_file) {
70 * find_vma() didn't find anything above us, or we're 94 pgoff_t pgoff;
71 * in an unmapped hole in the address space: ENOMEM.
72 */
73 if (!vma || addr < vma->vm_start)
74 return -ENOMEM;
75
76#ifdef CONFIG_HUGETLB_PAGE
77 if (is_vm_hugetlb_page(vma)) {
78 struct hstate *h;
79 unsigned long nr_huge;
80 unsigned char present;
81 95
82 i = 0; 96 pgoff = linear_page_index(vma, addr);
83 nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); 97 for (i = 0; i < nr; i++, pgoff++)
84 h = hstate_vma(vma); 98 vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
85 nr_huge = ((addr + pages * PAGE_SIZE - 1) >> huge_page_shift(h)) 99 } else {
86 - (addr >> huge_page_shift(h)) + 1; 100 for (i = 0; i < nr; i++)
87 nr_huge = min(nr_huge, 101 vec[i] = 0;
88 (vma->vm_end - addr) >> huge_page_shift(h));
89 while (1) {
90 /* hugepage always in RAM for now,
91 * but generally it needs to be check */
92 ptep = huge_pte_offset(current->mm,
93 addr & huge_page_mask(h));
94 present = !!(ptep &&
95 !huge_pte_none(huge_ptep_get(ptep)));
96 while (1) {
97 vec[i++] = present;
98 addr += PAGE_SIZE;
99 /* reach buffer limit */
100 if (i == nr)
101 return nr;
102 /* check hugepage border */
103 if (!((addr & ~huge_page_mask(h))
104 >> PAGE_SHIFT))
105 break;
106 }
107 }
108 return nr;
109 } 102 }
110#endif 103}
111
112 /*
113 * Calculate how many pages there are left in the last level of the
114 * PTE array for our address.
115 */
116 nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1));
117
118 /*
119 * Don't overrun this vma
120 */
121 nr = min(nr, (vma->vm_end - addr) >> PAGE_SHIFT);
122
123 /*
124 * Don't return more than the caller asked for
125 */
126 nr = min(nr, pages);
127 104
128 pgd = pgd_offset(vma->vm_mm, addr); 105static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
129 if (pgd_none_or_clear_bad(pgd)) 106 unsigned long addr, unsigned long end,
130 goto none_mapped; 107 unsigned char *vec)
131 pud = pud_offset(pgd, addr); 108{
132 if (pud_none_or_clear_bad(pud)) 109 unsigned long next;
133 goto none_mapped; 110 spinlock_t *ptl;
134 pmd = pmd_offset(pud, addr); 111 pte_t *ptep;
135 if (pmd_none_or_clear_bad(pmd))
136 goto none_mapped;
137 112
138 ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 113 ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
139 for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) { 114 do {
140 unsigned char present;
141 pte_t pte = *ptep; 115 pte_t pte = *ptep;
116 pgoff_t pgoff;
142 117
143 if (pte_present(pte)) { 118 next = addr + PAGE_SIZE;
144 present = 1; 119 if (pte_none(pte))
145 120 mincore_unmapped_range(vma, addr, next, vec);
146 } else if (pte_none(pte)) { 121 else if (pte_present(pte))
147 if (vma->vm_file) { 122 *vec = 1;
148 pgoff = linear_page_index(vma, addr); 123 else if (pte_file(pte)) {
149 present = mincore_page(vma->vm_file->f_mapping,
150 pgoff);
151 } else
152 present = 0;
153
154 } else if (pte_file(pte)) {
155 pgoff = pte_to_pgoff(pte); 124 pgoff = pte_to_pgoff(pte);
156 present = mincore_page(vma->vm_file->f_mapping, pgoff); 125 *vec = mincore_page(vma->vm_file->f_mapping, pgoff);
157
158 } else { /* pte is a swap entry */ 126 } else { /* pte is a swap entry */
159 swp_entry_t entry = pte_to_swp_entry(pte); 127 swp_entry_t entry = pte_to_swp_entry(pte);
128
160 if (is_migration_entry(entry)) { 129 if (is_migration_entry(entry)) {
161 /* migration entries are always uptodate */ 130 /* migration entries are always uptodate */
162 present = 1; 131 *vec = 1;
163 } else { 132 } else {
164#ifdef CONFIG_SWAP 133#ifdef CONFIG_SWAP
165 pgoff = entry.val; 134 pgoff = entry.val;
166 present = mincore_page(&swapper_space, pgoff); 135 *vec = mincore_page(&swapper_space, pgoff);
167#else 136#else
168 WARN_ON(1); 137 WARN_ON(1);
169 present = 1; 138 *vec = 1;
170#endif 139#endif
171 } 140 }
172 } 141 }
142 vec++;
143 } while (ptep++, addr = next, addr != end);
144 pte_unmap_unlock(ptep - 1, ptl);
145}
173 146
174 vec[i] = present; 147static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud,
175 } 148 unsigned long addr, unsigned long end,
176 pte_unmap_unlock(ptep-1, ptl); 149 unsigned char *vec)
150{
151 unsigned long next;
152 pmd_t *pmd;
177 153
178 return nr; 154 pmd = pmd_offset(pud, addr);
155 do {
156 next = pmd_addr_end(addr, end);
157 if (pmd_none_or_clear_bad(pmd))
158 mincore_unmapped_range(vma, addr, next, vec);
159 else
160 mincore_pte_range(vma, pmd, addr, next, vec);
161 vec += (next - addr) >> PAGE_SHIFT;
162 } while (pmd++, addr = next, addr != end);
163}
179 164
180none_mapped: 165static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
181 if (vma->vm_file) { 166 unsigned long addr, unsigned long end,
182 pgoff = linear_page_index(vma, addr); 167 unsigned char *vec)
183 for (i = 0; i < nr; i++, pgoff++) 168{
184 vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); 169 unsigned long next;
185 } else { 170 pud_t *pud;
186 for (i = 0; i < nr; i++) 171
187 vec[i] = 0; 172 pud = pud_offset(pgd, addr);
173 do {
174 next = pud_addr_end(addr, end);
175 if (pud_none_or_clear_bad(pud))
176 mincore_unmapped_range(vma, addr, next, vec);
177 else
178 mincore_pmd_range(vma, pud, addr, next, vec);
179 vec += (next - addr) >> PAGE_SHIFT;
180 } while (pud++, addr = next, addr != end);
181}
182
183static void mincore_page_range(struct vm_area_struct *vma,
184 unsigned long addr, unsigned long end,
185 unsigned char *vec)
186{
187 unsigned long next;
188 pgd_t *pgd;
189
190 pgd = pgd_offset(vma->vm_mm, addr);
191 do {
192 next = pgd_addr_end(addr, end);
193 if (pgd_none_or_clear_bad(pgd))
194 mincore_unmapped_range(vma, addr, next, vec);
195 else
196 mincore_pud_range(vma, pgd, addr, next, vec);
197 vec += (next - addr) >> PAGE_SHIFT;
198 } while (pgd++, addr = next, addr != end);
199}
200
201/*
202 * Do a chunk of "sys_mincore()". We've already checked
203 * all the arguments, we hold the mmap semaphore: we should
204 * just return the amount of info we're asked for.
205 */
206static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec)
207{
208 struct vm_area_struct *vma;
209 unsigned long end;
210
211 vma = find_vma(current->mm, addr);
212 if (!vma || addr < vma->vm_start)
213 return -ENOMEM;
214
215 end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
216
217 if (is_vm_hugetlb_page(vma)) {
218 mincore_hugetlb_page_range(vma, addr, end, vec);
219 return (end - addr) >> PAGE_SHIFT;
188 } 220 }
189 221
190 return nr; 222 end = pmd_addr_end(addr, end);
223
224 if (is_vm_hugetlb_page(vma))
225 mincore_hugetlb_page_range(vma, addr, end, vec);
226 else
227 mincore_page_range(vma, addr, end, vec);
228
229 return (end - addr) >> PAGE_SHIFT;
191} 230}
192 231
193/* 232/*
@@ -247,7 +286,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
247 * the temporary buffer size. 286 * the temporary buffer size.
248 */ 287 */
249 down_read(&current->mm->mmap_sem); 288 down_read(&current->mm->mmap_sem);
250 retval = do_mincore(start, tmp, min(pages, PAGE_SIZE)); 289 retval = do_mincore(start, min(pages, PAGE_SIZE), tmp);
251 up_read(&current->mm->mmap_sem); 290 up_read(&current->mm->mmap_sem);
252 291
253 if (retval <= 0) 292 if (retval <= 0)