diff options
Diffstat (limited to 'mm/mincore.c')
-rw-r--r-- | mm/mincore.c | 263 |
1 files changed, 151 insertions, 112 deletions
diff --git a/mm/mincore.c b/mm/mincore.c index f77433c20279..9ac42dc6d7b6 100644 --- a/mm/mincore.c +++ b/mm/mincore.c | |||
@@ -19,6 +19,40 @@ | |||
19 | #include <asm/uaccess.h> | 19 | #include <asm/uaccess.h> |
20 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
21 | 21 | ||
22 | static void mincore_hugetlb_page_range(struct vm_area_struct *vma, | ||
23 | unsigned long addr, unsigned long end, | ||
24 | unsigned char *vec) | ||
25 | { | ||
26 | #ifdef CONFIG_HUGETLB_PAGE | ||
27 | struct hstate *h; | ||
28 | |||
29 | h = hstate_vma(vma); | ||
30 | while (1) { | ||
31 | unsigned char present; | ||
32 | pte_t *ptep; | ||
33 | /* | ||
34 | * Huge pages are always in RAM for now, but | ||
35 | * theoretically it needs to be checked. | ||
36 | */ | ||
37 | ptep = huge_pte_offset(current->mm, | ||
38 | addr & huge_page_mask(h)); | ||
39 | present = ptep && !huge_pte_none(huge_ptep_get(ptep)); | ||
40 | while (1) { | ||
41 | *vec = present; | ||
42 | vec++; | ||
43 | addr += PAGE_SIZE; | ||
44 | if (addr == end) | ||
45 | return; | ||
46 | /* check hugepage border */ | ||
47 | if (!(addr & ~huge_page_mask(h))) | ||
48 | break; | ||
49 | } | ||
50 | } | ||
51 | #else | ||
52 | BUG(); | ||
53 | #endif | ||
54 | } | ||
55 | |||
22 | /* | 56 | /* |
23 | * Later we can get more picky about what "in core" means precisely. | 57 | * Later we can get more picky about what "in core" means precisely. |
24 | * For now, simply check to see if the page is in the page cache, | 58 | * For now, simply check to see if the page is in the page cache, |
@@ -49,145 +83,150 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) | |||
49 | return present; | 83 | return present; |
50 | } | 84 | } |
51 | 85 | ||
52 | /* | 86 | static void mincore_unmapped_range(struct vm_area_struct *vma, |
53 | * Do a chunk of "sys_mincore()". We've already checked | 87 | unsigned long addr, unsigned long end, |
54 | * all the arguments, we hold the mmap semaphore: we should | 88 | unsigned char *vec) |
55 | * just return the amount of info we're asked for. | ||
56 | */ | ||
57 | static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages) | ||
58 | { | 89 | { |
59 | pgd_t *pgd; | 90 | unsigned long nr = (end - addr) >> PAGE_SHIFT; |
60 | pud_t *pud; | ||
61 | pmd_t *pmd; | ||
62 | pte_t *ptep; | ||
63 | spinlock_t *ptl; | ||
64 | unsigned long nr; | ||
65 | int i; | 91 | int i; |
66 | pgoff_t pgoff; | ||
67 | struct vm_area_struct *vma = find_vma(current->mm, addr); | ||
68 | 92 | ||
69 | /* | 93 | if (vma->vm_file) { |
70 | * find_vma() didn't find anything above us, or we're | 94 | pgoff_t pgoff; |
71 | * in an unmapped hole in the address space: ENOMEM. | ||
72 | */ | ||
73 | if (!vma || addr < vma->vm_start) | ||
74 | return -ENOMEM; | ||
75 | |||
76 | #ifdef CONFIG_HUGETLB_PAGE | ||
77 | if (is_vm_hugetlb_page(vma)) { | ||
78 | struct hstate *h; | ||
79 | unsigned long nr_huge; | ||
80 | unsigned char present; | ||
81 | 95 | ||
82 | i = 0; | 96 | pgoff = linear_page_index(vma, addr); |
83 | nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT); | 97 | for (i = 0; i < nr; i++, pgoff++) |
84 | h = hstate_vma(vma); | 98 | vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); |
85 | nr_huge = ((addr + pages * PAGE_SIZE - 1) >> huge_page_shift(h)) | 99 | } else { |
86 | - (addr >> huge_page_shift(h)) + 1; | 100 | for (i = 0; i < nr; i++) |
87 | nr_huge = min(nr_huge, | 101 | vec[i] = 0; |
88 | (vma->vm_end - addr) >> huge_page_shift(h)); | ||
89 | while (1) { | ||
90 | /* hugepage always in RAM for now, | ||
91 | * but generally it needs to be check */ | ||
92 | ptep = huge_pte_offset(current->mm, | ||
93 | addr & huge_page_mask(h)); | ||
94 | present = !!(ptep && | ||
95 | !huge_pte_none(huge_ptep_get(ptep))); | ||
96 | while (1) { | ||
97 | vec[i++] = present; | ||
98 | addr += PAGE_SIZE; | ||
99 | /* reach buffer limit */ | ||
100 | if (i == nr) | ||
101 | return nr; | ||
102 | /* check hugepage border */ | ||
103 | if (!((addr & ~huge_page_mask(h)) | ||
104 | >> PAGE_SHIFT)) | ||
105 | break; | ||
106 | } | ||
107 | } | ||
108 | return nr; | ||
109 | } | 102 | } |
110 | #endif | 103 | } |
111 | |||
112 | /* | ||
113 | * Calculate how many pages there are left in the last level of the | ||
114 | * PTE array for our address. | ||
115 | */ | ||
116 | nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1)); | ||
117 | |||
118 | /* | ||
119 | * Don't overrun this vma | ||
120 | */ | ||
121 | nr = min(nr, (vma->vm_end - addr) >> PAGE_SHIFT); | ||
122 | |||
123 | /* | ||
124 | * Don't return more than the caller asked for | ||
125 | */ | ||
126 | nr = min(nr, pages); | ||
127 | 104 | ||
128 | pgd = pgd_offset(vma->vm_mm, addr); | 105 | static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, |
129 | if (pgd_none_or_clear_bad(pgd)) | 106 | unsigned long addr, unsigned long end, |
130 | goto none_mapped; | 107 | unsigned char *vec) |
131 | pud = pud_offset(pgd, addr); | 108 | { |
132 | if (pud_none_or_clear_bad(pud)) | 109 | unsigned long next; |
133 | goto none_mapped; | 110 | spinlock_t *ptl; |
134 | pmd = pmd_offset(pud, addr); | 111 | pte_t *ptep; |
135 | if (pmd_none_or_clear_bad(pmd)) | ||
136 | goto none_mapped; | ||
137 | 112 | ||
138 | ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 113 | ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
139 | for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) { | 114 | do { |
140 | unsigned char present; | ||
141 | pte_t pte = *ptep; | 115 | pte_t pte = *ptep; |
116 | pgoff_t pgoff; | ||
142 | 117 | ||
143 | if (pte_present(pte)) { | 118 | next = addr + PAGE_SIZE; |
144 | present = 1; | 119 | if (pte_none(pte)) |
145 | 120 | mincore_unmapped_range(vma, addr, next, vec); | |
146 | } else if (pte_none(pte)) { | 121 | else if (pte_present(pte)) |
147 | if (vma->vm_file) { | 122 | *vec = 1; |
148 | pgoff = linear_page_index(vma, addr); | 123 | else if (pte_file(pte)) { |
149 | present = mincore_page(vma->vm_file->f_mapping, | ||
150 | pgoff); | ||
151 | } else | ||
152 | present = 0; | ||
153 | |||
154 | } else if (pte_file(pte)) { | ||
155 | pgoff = pte_to_pgoff(pte); | 124 | pgoff = pte_to_pgoff(pte); |
156 | present = mincore_page(vma->vm_file->f_mapping, pgoff); | 125 | *vec = mincore_page(vma->vm_file->f_mapping, pgoff); |
157 | |||
158 | } else { /* pte is a swap entry */ | 126 | } else { /* pte is a swap entry */ |
159 | swp_entry_t entry = pte_to_swp_entry(pte); | 127 | swp_entry_t entry = pte_to_swp_entry(pte); |
128 | |||
160 | if (is_migration_entry(entry)) { | 129 | if (is_migration_entry(entry)) { |
161 | /* migration entries are always uptodate */ | 130 | /* migration entries are always uptodate */ |
162 | present = 1; | 131 | *vec = 1; |
163 | } else { | 132 | } else { |
164 | #ifdef CONFIG_SWAP | 133 | #ifdef CONFIG_SWAP |
165 | pgoff = entry.val; | 134 | pgoff = entry.val; |
166 | present = mincore_page(&swapper_space, pgoff); | 135 | *vec = mincore_page(&swapper_space, pgoff); |
167 | #else | 136 | #else |
168 | WARN_ON(1); | 137 | WARN_ON(1); |
169 | present = 1; | 138 | *vec = 1; |
170 | #endif | 139 | #endif |
171 | } | 140 | } |
172 | } | 141 | } |
142 | vec++; | ||
143 | } while (ptep++, addr = next, addr != end); | ||
144 | pte_unmap_unlock(ptep - 1, ptl); | ||
145 | } | ||
173 | 146 | ||
174 | vec[i] = present; | 147 | static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud, |
175 | } | 148 | unsigned long addr, unsigned long end, |
176 | pte_unmap_unlock(ptep-1, ptl); | 149 | unsigned char *vec) |
150 | { | ||
151 | unsigned long next; | ||
152 | pmd_t *pmd; | ||
177 | 153 | ||
178 | return nr; | 154 | pmd = pmd_offset(pud, addr); |
155 | do { | ||
156 | next = pmd_addr_end(addr, end); | ||
157 | if (pmd_none_or_clear_bad(pmd)) | ||
158 | mincore_unmapped_range(vma, addr, next, vec); | ||
159 | else | ||
160 | mincore_pte_range(vma, pmd, addr, next, vec); | ||
161 | vec += (next - addr) >> PAGE_SHIFT; | ||
162 | } while (pmd++, addr = next, addr != end); | ||
163 | } | ||
179 | 164 | ||
180 | none_mapped: | 165 | static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd, |
181 | if (vma->vm_file) { | 166 | unsigned long addr, unsigned long end, |
182 | pgoff = linear_page_index(vma, addr); | 167 | unsigned char *vec) |
183 | for (i = 0; i < nr; i++, pgoff++) | 168 | { |
184 | vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff); | 169 | unsigned long next; |
185 | } else { | 170 | pud_t *pud; |
186 | for (i = 0; i < nr; i++) | 171 | |
187 | vec[i] = 0; | 172 | pud = pud_offset(pgd, addr); |
173 | do { | ||
174 | next = pud_addr_end(addr, end); | ||
175 | if (pud_none_or_clear_bad(pud)) | ||
176 | mincore_unmapped_range(vma, addr, next, vec); | ||
177 | else | ||
178 | mincore_pmd_range(vma, pud, addr, next, vec); | ||
179 | vec += (next - addr) >> PAGE_SHIFT; | ||
180 | } while (pud++, addr = next, addr != end); | ||
181 | } | ||
182 | |||
183 | static void mincore_page_range(struct vm_area_struct *vma, | ||
184 | unsigned long addr, unsigned long end, | ||
185 | unsigned char *vec) | ||
186 | { | ||
187 | unsigned long next; | ||
188 | pgd_t *pgd; | ||
189 | |||
190 | pgd = pgd_offset(vma->vm_mm, addr); | ||
191 | do { | ||
192 | next = pgd_addr_end(addr, end); | ||
193 | if (pgd_none_or_clear_bad(pgd)) | ||
194 | mincore_unmapped_range(vma, addr, next, vec); | ||
195 | else | ||
196 | mincore_pud_range(vma, pgd, addr, next, vec); | ||
197 | vec += (next - addr) >> PAGE_SHIFT; | ||
198 | } while (pgd++, addr = next, addr != end); | ||
199 | } | ||
200 | |||
201 | /* | ||
202 | * Do a chunk of "sys_mincore()". We've already checked | ||
203 | * all the arguments, we hold the mmap semaphore: we should | ||
204 | * just return the amount of info we're asked for. | ||
205 | */ | ||
206 | static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *vec) | ||
207 | { | ||
208 | struct vm_area_struct *vma; | ||
209 | unsigned long end; | ||
210 | |||
211 | vma = find_vma(current->mm, addr); | ||
212 | if (!vma || addr < vma->vm_start) | ||
213 | return -ENOMEM; | ||
214 | |||
215 | end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); | ||
216 | |||
217 | if (is_vm_hugetlb_page(vma)) { | ||
218 | mincore_hugetlb_page_range(vma, addr, end, vec); | ||
219 | return (end - addr) >> PAGE_SHIFT; | ||
188 | } | 220 | } |
189 | 221 | ||
190 | return nr; | 222 | end = pmd_addr_end(addr, end); |
223 | |||
224 | if (is_vm_hugetlb_page(vma)) | ||
225 | mincore_hugetlb_page_range(vma, addr, end, vec); | ||
226 | else | ||
227 | mincore_page_range(vma, addr, end, vec); | ||
228 | |||
229 | return (end - addr) >> PAGE_SHIFT; | ||
191 | } | 230 | } |
192 | 231 | ||
193 | /* | 232 | /* |
@@ -247,7 +286,7 @@ SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, | |||
247 | * the temporary buffer size. | 286 | * the temporary buffer size. |
248 | */ | 287 | */ |
249 | down_read(¤t->mm->mmap_sem); | 288 | down_read(¤t->mm->mmap_sem); |
250 | retval = do_mincore(start, tmp, min(pages, PAGE_SIZE)); | 289 | retval = do_mincore(start, min(pages, PAGE_SIZE), tmp); |
251 | up_read(¤t->mm->mmap_sem); | 290 | up_read(¤t->mm->mmap_sem); |
252 | 291 | ||
253 | if (retval <= 0) | 292 | if (retval <= 0) |