diff options
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r-- | fs/proc/task_mmu.c | 676 |
1 files changed, 436 insertions, 240 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8043a3eab52..38338ed98cc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -5,7 +5,10 @@ | |||
5 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
6 | #include <linux/ptrace.h> | 6 | #include <linux/ptrace.h> |
7 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
8 | #include <linux/ptrace.h> | ||
8 | #include <linux/mempolicy.h> | 9 | #include <linux/mempolicy.h> |
10 | #include <linux/swap.h> | ||
11 | #include <linux/swapops.h> | ||
9 | 12 | ||
10 | #include <asm/elf.h> | 13 | #include <asm/elf.h> |
11 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
@@ -114,24 +117,124 @@ static void pad_len_spaces(struct seq_file *m, int len) | |||
114 | seq_printf(m, "%*c", len, ' '); | 117 | seq_printf(m, "%*c", len, ' '); |
115 | } | 118 | } |
116 | 119 | ||
117 | struct mem_size_stats | 120 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) |
118 | { | 121 | { |
119 | unsigned long resident; | 122 | if (vma && vma != priv->tail_vma) { |
120 | unsigned long shared_clean; | 123 | struct mm_struct *mm = vma->vm_mm; |
121 | unsigned long shared_dirty; | 124 | up_read(&mm->mmap_sem); |
122 | unsigned long private_clean; | 125 | mmput(mm); |
123 | unsigned long private_dirty; | 126 | } |
124 | unsigned long referenced; | 127 | } |
125 | }; | ||
126 | 128 | ||
127 | struct pmd_walker { | 129 | static void *m_start(struct seq_file *m, loff_t *pos) |
128 | struct vm_area_struct *vma; | 130 | { |
129 | void *private; | 131 | struct proc_maps_private *priv = m->private; |
130 | void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, | 132 | unsigned long last_addr = m->version; |
131 | unsigned long, void *); | 133 | struct mm_struct *mm; |
132 | }; | 134 | struct vm_area_struct *vma, *tail_vma = NULL; |
135 | loff_t l = *pos; | ||
136 | |||
137 | /* Clear the per syscall fields in priv */ | ||
138 | priv->task = NULL; | ||
139 | priv->tail_vma = NULL; | ||
140 | |||
141 | /* | ||
142 | * We remember last_addr rather than next_addr to hit with | ||
143 | * mmap_cache most of the time. We have zero last_addr at | ||
144 | * the beginning and also after lseek. We will have -1 last_addr | ||
145 | * after the end of the vmas. | ||
146 | */ | ||
147 | |||
148 | if (last_addr == -1UL) | ||
149 | return NULL; | ||
150 | |||
151 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | ||
152 | if (!priv->task) | ||
153 | return NULL; | ||
154 | |||
155 | mm = mm_for_maps(priv->task); | ||
156 | if (!mm) | ||
157 | return NULL; | ||
158 | |||
159 | tail_vma = get_gate_vma(priv->task); | ||
160 | priv->tail_vma = tail_vma; | ||
161 | |||
162 | /* Start with last addr hint */ | ||
163 | vma = find_vma(mm, last_addr); | ||
164 | if (last_addr && vma) { | ||
165 | vma = vma->vm_next; | ||
166 | goto out; | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | * Check the vma index is within the range and do | ||
171 | * sequential scan until m_index. | ||
172 | */ | ||
173 | vma = NULL; | ||
174 | if ((unsigned long)l < mm->map_count) { | ||
175 | vma = mm->mmap; | ||
176 | while (l-- && vma) | ||
177 | vma = vma->vm_next; | ||
178 | goto out; | ||
179 | } | ||
180 | |||
181 | if (l != mm->map_count) | ||
182 | tail_vma = NULL; /* After gate vma */ | ||
183 | |||
184 | out: | ||
185 | if (vma) | ||
186 | return vma; | ||
187 | |||
188 | /* End of vmas has been reached */ | ||
189 | m->version = (tail_vma != NULL)? 0: -1UL; | ||
190 | up_read(&mm->mmap_sem); | ||
191 | mmput(mm); | ||
192 | return tail_vma; | ||
193 | } | ||
133 | 194 | ||
134 | static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) | 195 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) |
196 | { | ||
197 | struct proc_maps_private *priv = m->private; | ||
198 | struct vm_area_struct *vma = v; | ||
199 | struct vm_area_struct *tail_vma = priv->tail_vma; | ||
200 | |||
201 | (*pos)++; | ||
202 | if (vma && (vma != tail_vma) && vma->vm_next) | ||
203 | return vma->vm_next; | ||
204 | vma_stop(priv, vma); | ||
205 | return (vma != tail_vma)? tail_vma: NULL; | ||
206 | } | ||
207 | |||
208 | static void m_stop(struct seq_file *m, void *v) | ||
209 | { | ||
210 | struct proc_maps_private *priv = m->private; | ||
211 | struct vm_area_struct *vma = v; | ||
212 | |||
213 | vma_stop(priv, vma); | ||
214 | if (priv->task) | ||
215 | put_task_struct(priv->task); | ||
216 | } | ||
217 | |||
218 | static int do_maps_open(struct inode *inode, struct file *file, | ||
219 | struct seq_operations *ops) | ||
220 | { | ||
221 | struct proc_maps_private *priv; | ||
222 | int ret = -ENOMEM; | ||
223 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
224 | if (priv) { | ||
225 | priv->pid = proc_pid(inode); | ||
226 | ret = seq_open(file, ops); | ||
227 | if (!ret) { | ||
228 | struct seq_file *m = file->private_data; | ||
229 | m->private = priv; | ||
230 | } else { | ||
231 | kfree(priv); | ||
232 | } | ||
233 | } | ||
234 | return ret; | ||
235 | } | ||
236 | |||
237 | static int show_map(struct seq_file *m, void *v) | ||
135 | { | 238 | { |
136 | struct proc_maps_private *priv = m->private; | 239 | struct proc_maps_private *priv = m->private; |
137 | struct task_struct *task = priv->task; | 240 | struct task_struct *task = priv->task; |
@@ -191,41 +294,71 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats | |||
191 | } | 294 | } |
192 | seq_putc(m, '\n'); | 295 | seq_putc(m, '\n'); |
193 | 296 | ||
194 | if (mss) | ||
195 | seq_printf(m, | ||
196 | "Size: %8lu kB\n" | ||
197 | "Rss: %8lu kB\n" | ||
198 | "Shared_Clean: %8lu kB\n" | ||
199 | "Shared_Dirty: %8lu kB\n" | ||
200 | "Private_Clean: %8lu kB\n" | ||
201 | "Private_Dirty: %8lu kB\n" | ||
202 | "Referenced: %8lu kB\n", | ||
203 | (vma->vm_end - vma->vm_start) >> 10, | ||
204 | mss->resident >> 10, | ||
205 | mss->shared_clean >> 10, | ||
206 | mss->shared_dirty >> 10, | ||
207 | mss->private_clean >> 10, | ||
208 | mss->private_dirty >> 10, | ||
209 | mss->referenced >> 10); | ||
210 | |||
211 | if (m->count < m->size) /* vma is copied successfully */ | 297 | if (m->count < m->size) /* vma is copied successfully */ |
212 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; | 298 | m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; |
213 | return 0; | 299 | return 0; |
214 | } | 300 | } |
215 | 301 | ||
216 | static int show_map(struct seq_file *m, void *v) | 302 | static struct seq_operations proc_pid_maps_op = { |
303 | .start = m_start, | ||
304 | .next = m_next, | ||
305 | .stop = m_stop, | ||
306 | .show = show_map | ||
307 | }; | ||
308 | |||
309 | static int maps_open(struct inode *inode, struct file *file) | ||
217 | { | 310 | { |
218 | return show_map_internal(m, v, NULL); | 311 | return do_maps_open(inode, file, &proc_pid_maps_op); |
219 | } | 312 | } |
220 | 313 | ||
221 | static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 314 | const struct file_operations proc_maps_operations = { |
222 | unsigned long addr, unsigned long end, | 315 | .open = maps_open, |
223 | void *private) | 316 | .read = seq_read, |
317 | .llseek = seq_lseek, | ||
318 | .release = seq_release_private, | ||
319 | }; | ||
320 | |||
321 | /* | ||
322 | * Proportional Set Size(PSS): my share of RSS. | ||
323 | * | ||
324 | * PSS of a process is the count of pages it has in memory, where each | ||
325 | * page is divided by the number of processes sharing it. So if a | ||
326 | * process has 1000 pages all to itself, and 1000 shared with one other | ||
327 | * process, its PSS will be 1500. | ||
328 | * | ||
329 | * To keep (accumulated) division errors low, we adopt a 64bit | ||
330 | * fixed-point pss counter to minimize division errors. So (pss >> | ||
331 | * PSS_SHIFT) would be the real byte count. | ||
332 | * | ||
333 | * A shift of 12 before division means (assuming 4K page size): | ||
334 | * - 1M 3-user-pages add up to 8KB errors; | ||
335 | * - supports mapcount up to 2^24, or 16M; | ||
336 | * - supports PSS up to 2^52 bytes, or 4PB. | ||
337 | */ | ||
338 | #define PSS_SHIFT 12 | ||
339 | |||
340 | #ifdef CONFIG_PROC_PAGE_MONITOR | ||
341 | struct mem_size_stats | ||
342 | { | ||
343 | struct vm_area_struct *vma; | ||
344 | unsigned long resident; | ||
345 | unsigned long shared_clean; | ||
346 | unsigned long shared_dirty; | ||
347 | unsigned long private_clean; | ||
348 | unsigned long private_dirty; | ||
349 | unsigned long referenced; | ||
350 | u64 pss; | ||
351 | }; | ||
352 | |||
353 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | ||
354 | void *private) | ||
224 | { | 355 | { |
225 | struct mem_size_stats *mss = private; | 356 | struct mem_size_stats *mss = private; |
357 | struct vm_area_struct *vma = mss->vma; | ||
226 | pte_t *pte, ptent; | 358 | pte_t *pte, ptent; |
227 | spinlock_t *ptl; | 359 | spinlock_t *ptl; |
228 | struct page *page; | 360 | struct page *page; |
361 | int mapcount; | ||
229 | 362 | ||
230 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 363 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
231 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 364 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
@@ -242,26 +375,88 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
242 | /* Accumulate the size in pages that have been accessed. */ | 375 | /* Accumulate the size in pages that have been accessed. */ |
243 | if (pte_young(ptent) || PageReferenced(page)) | 376 | if (pte_young(ptent) || PageReferenced(page)) |
244 | mss->referenced += PAGE_SIZE; | 377 | mss->referenced += PAGE_SIZE; |
245 | if (page_mapcount(page) >= 2) { | 378 | mapcount = page_mapcount(page); |
379 | if (mapcount >= 2) { | ||
246 | if (pte_dirty(ptent)) | 380 | if (pte_dirty(ptent)) |
247 | mss->shared_dirty += PAGE_SIZE; | 381 | mss->shared_dirty += PAGE_SIZE; |
248 | else | 382 | else |
249 | mss->shared_clean += PAGE_SIZE; | 383 | mss->shared_clean += PAGE_SIZE; |
384 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; | ||
250 | } else { | 385 | } else { |
251 | if (pte_dirty(ptent)) | 386 | if (pte_dirty(ptent)) |
252 | mss->private_dirty += PAGE_SIZE; | 387 | mss->private_dirty += PAGE_SIZE; |
253 | else | 388 | else |
254 | mss->private_clean += PAGE_SIZE; | 389 | mss->private_clean += PAGE_SIZE; |
390 | mss->pss += (PAGE_SIZE << PSS_SHIFT); | ||
255 | } | 391 | } |
256 | } | 392 | } |
257 | pte_unmap_unlock(pte - 1, ptl); | 393 | pte_unmap_unlock(pte - 1, ptl); |
258 | cond_resched(); | 394 | cond_resched(); |
395 | return 0; | ||
259 | } | 396 | } |
260 | 397 | ||
261 | static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | 398 | static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; |
262 | unsigned long addr, unsigned long end, | 399 | |
263 | void *private) | 400 | static int show_smap(struct seq_file *m, void *v) |
264 | { | 401 | { |
402 | struct vm_area_struct *vma = v; | ||
403 | struct mem_size_stats mss; | ||
404 | int ret; | ||
405 | |||
406 | memset(&mss, 0, sizeof mss); | ||
407 | mss.vma = vma; | ||
408 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | ||
409 | walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, | ||
410 | &smaps_walk, &mss); | ||
411 | |||
412 | ret = show_map(m, v); | ||
413 | if (ret) | ||
414 | return ret; | ||
415 | |||
416 | seq_printf(m, | ||
417 | "Size: %8lu kB\n" | ||
418 | "Rss: %8lu kB\n" | ||
419 | "Pss: %8lu kB\n" | ||
420 | "Shared_Clean: %8lu kB\n" | ||
421 | "Shared_Dirty: %8lu kB\n" | ||
422 | "Private_Clean: %8lu kB\n" | ||
423 | "Private_Dirty: %8lu kB\n" | ||
424 | "Referenced: %8lu kB\n", | ||
425 | (vma->vm_end - vma->vm_start) >> 10, | ||
426 | mss.resident >> 10, | ||
427 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)), | ||
428 | mss.shared_clean >> 10, | ||
429 | mss.shared_dirty >> 10, | ||
430 | mss.private_clean >> 10, | ||
431 | mss.private_dirty >> 10, | ||
432 | mss.referenced >> 10); | ||
433 | |||
434 | return ret; | ||
435 | } | ||
436 | |||
437 | static struct seq_operations proc_pid_smaps_op = { | ||
438 | .start = m_start, | ||
439 | .next = m_next, | ||
440 | .stop = m_stop, | ||
441 | .show = show_smap | ||
442 | }; | ||
443 | |||
444 | static int smaps_open(struct inode *inode, struct file *file) | ||
445 | { | ||
446 | return do_maps_open(inode, file, &proc_pid_smaps_op); | ||
447 | } | ||
448 | |||
449 | const struct file_operations proc_smaps_operations = { | ||
450 | .open = smaps_open, | ||
451 | .read = seq_read, | ||
452 | .llseek = seq_lseek, | ||
453 | .release = seq_release_private, | ||
454 | }; | ||
455 | |||
456 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | ||
457 | unsigned long end, void *private) | ||
458 | { | ||
459 | struct vm_area_struct *vma = private; | ||
265 | pte_t *pte, ptent; | 460 | pte_t *pte, ptent; |
266 | spinlock_t *ptl; | 461 | spinlock_t *ptl; |
267 | struct page *page; | 462 | struct page *page; |
@@ -282,235 +477,248 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, | |||
282 | } | 477 | } |
283 | pte_unmap_unlock(pte - 1, ptl); | 478 | pte_unmap_unlock(pte - 1, ptl); |
284 | cond_resched(); | 479 | cond_resched(); |
480 | return 0; | ||
285 | } | 481 | } |
286 | 482 | ||
287 | static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, | 483 | static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; |
288 | unsigned long addr, unsigned long end) | 484 | |
485 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | ||
486 | size_t count, loff_t *ppos) | ||
289 | { | 487 | { |
290 | pmd_t *pmd; | 488 | struct task_struct *task; |
291 | unsigned long next; | 489 | char buffer[PROC_NUMBUF], *end; |
490 | struct mm_struct *mm; | ||
491 | struct vm_area_struct *vma; | ||
292 | 492 | ||
293 | for (pmd = pmd_offset(pud, addr); addr != end; | 493 | memset(buffer, 0, sizeof(buffer)); |
294 | pmd++, addr = next) { | 494 | if (count > sizeof(buffer) - 1) |
295 | next = pmd_addr_end(addr, end); | 495 | count = sizeof(buffer) - 1; |
296 | if (pmd_none_or_clear_bad(pmd)) | 496 | if (copy_from_user(buffer, buf, count)) |
297 | continue; | 497 | return -EFAULT; |
298 | walker->action(walker->vma, pmd, addr, next, walker->private); | 498 | if (!simple_strtol(buffer, &end, 0)) |
499 | return -EINVAL; | ||
500 | if (*end == '\n') | ||
501 | end++; | ||
502 | task = get_proc_task(file->f_path.dentry->d_inode); | ||
503 | if (!task) | ||
504 | return -ESRCH; | ||
505 | mm = get_task_mm(task); | ||
506 | if (mm) { | ||
507 | down_read(&mm->mmap_sem); | ||
508 | for (vma = mm->mmap; vma; vma = vma->vm_next) | ||
509 | if (!is_vm_hugetlb_page(vma)) | ||
510 | walk_page_range(mm, vma->vm_start, vma->vm_end, | ||
511 | &clear_refs_walk, vma); | ||
512 | flush_tlb_mm(mm); | ||
513 | up_read(&mm->mmap_sem); | ||
514 | mmput(mm); | ||
299 | } | 515 | } |
516 | put_task_struct(task); | ||
517 | if (end - buffer == 0) | ||
518 | return -EIO; | ||
519 | return end - buffer; | ||
300 | } | 520 | } |
301 | 521 | ||
302 | static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, | 522 | const struct file_operations proc_clear_refs_operations = { |
303 | unsigned long addr, unsigned long end) | 523 | .write = clear_refs_write, |
304 | { | 524 | }; |
305 | pud_t *pud; | ||
306 | unsigned long next; | ||
307 | 525 | ||
308 | for (pud = pud_offset(pgd, addr); addr != end; | 526 | struct pagemapread { |
309 | pud++, addr = next) { | 527 | char __user *out, *end; |
310 | next = pud_addr_end(addr, end); | 528 | }; |
311 | if (pud_none_or_clear_bad(pud)) | 529 | |
312 | continue; | 530 | #define PM_ENTRY_BYTES sizeof(u64) |
313 | walk_pmd_range(walker, pud, addr, next); | 531 | #define PM_RESERVED_BITS 3 |
532 | #define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) | ||
533 | #define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) | ||
534 | #define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) | ||
535 | #define PM_NOT_PRESENT PM_SPECIAL(1LL) | ||
536 | #define PM_SWAP PM_SPECIAL(2LL) | ||
537 | #define PM_END_OF_BUFFER 1 | ||
538 | |||
539 | static int add_to_pagemap(unsigned long addr, u64 pfn, | ||
540 | struct pagemapread *pm) | ||
541 | { | ||
542 | /* | ||
543 | * Make sure there's room in the buffer for an | ||
544 | * entire entry. Otherwise, only copy part of | ||
545 | * the pfn. | ||
546 | */ | ||
547 | if (pm->out + PM_ENTRY_BYTES >= pm->end) { | ||
548 | if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) | ||
549 | return -EFAULT; | ||
550 | pm->out = pm->end; | ||
551 | return PM_END_OF_BUFFER; | ||
314 | } | 552 | } |
553 | |||
554 | if (put_user(pfn, pm->out)) | ||
555 | return -EFAULT; | ||
556 | pm->out += PM_ENTRY_BYTES; | ||
557 | return 0; | ||
315 | } | 558 | } |
316 | 559 | ||
317 | /* | 560 | static int pagemap_pte_hole(unsigned long start, unsigned long end, |
318 | * walk_page_range - walk the page tables of a VMA with a callback | 561 | void *private) |
319 | * @vma - VMA to walk | ||
320 | * @action - callback invoked for every bottom-level (PTE) page table | ||
321 | * @private - private data passed to the callback function | ||
322 | * | ||
323 | * Recursively walk the page table for the memory area in a VMA, calling | ||
324 | * a callback for every bottom-level (PTE) page table. | ||
325 | */ | ||
326 | static inline void walk_page_range(struct vm_area_struct *vma, | ||
327 | void (*action)(struct vm_area_struct *, | ||
328 | pmd_t *, unsigned long, | ||
329 | unsigned long, void *), | ||
330 | void *private) | ||
331 | { | 562 | { |
332 | unsigned long addr = vma->vm_start; | 563 | struct pagemapread *pm = private; |
333 | unsigned long end = vma->vm_end; | 564 | unsigned long addr; |
334 | struct pmd_walker walker = { | 565 | int err = 0; |
335 | .vma = vma, | 566 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
336 | .private = private, | 567 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); |
337 | .action = action, | 568 | if (err) |
338 | }; | 569 | break; |
339 | pgd_t *pgd; | ||
340 | unsigned long next; | ||
341 | |||
342 | for (pgd = pgd_offset(vma->vm_mm, addr); addr != end; | ||
343 | pgd++, addr = next) { | ||
344 | next = pgd_addr_end(addr, end); | ||
345 | if (pgd_none_or_clear_bad(pgd)) | ||
346 | continue; | ||
347 | walk_pud_range(&walker, pgd, addr, next); | ||
348 | } | 570 | } |
571 | return err; | ||
349 | } | 572 | } |
350 | 573 | ||
351 | static int show_smap(struct seq_file *m, void *v) | 574 | u64 swap_pte_to_pagemap_entry(pte_t pte) |
352 | { | 575 | { |
353 | struct vm_area_struct *vma = v; | 576 | swp_entry_t e = pte_to_swp_entry(pte); |
354 | struct mem_size_stats mss; | 577 | return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); |
355 | |||
356 | memset(&mss, 0, sizeof mss); | ||
357 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | ||
358 | walk_page_range(vma, smaps_pte_range, &mss); | ||
359 | return show_map_internal(m, v, &mss); | ||
360 | } | 578 | } |
361 | 579 | ||
362 | void clear_refs_smap(struct mm_struct *mm) | 580 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
581 | void *private) | ||
363 | { | 582 | { |
364 | struct vm_area_struct *vma; | 583 | struct pagemapread *pm = private; |
584 | pte_t *pte; | ||
585 | int err = 0; | ||
586 | |||
587 | for (; addr != end; addr += PAGE_SIZE) { | ||
588 | u64 pfn = PM_NOT_PRESENT; | ||
589 | pte = pte_offset_map(pmd, addr); | ||
590 | if (is_swap_pte(*pte)) | ||
591 | pfn = swap_pte_to_pagemap_entry(*pte); | ||
592 | else if (pte_present(*pte)) | ||
593 | pfn = pte_pfn(*pte); | ||
594 | /* unmap so we're not in atomic when we copy to userspace */ | ||
595 | pte_unmap(pte); | ||
596 | err = add_to_pagemap(addr, pfn, pm); | ||
597 | if (err) | ||
598 | return err; | ||
599 | } | ||
365 | 600 | ||
366 | down_read(&mm->mmap_sem); | 601 | cond_resched(); |
367 | for (vma = mm->mmap; vma; vma = vma->vm_next) | 602 | |
368 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 603 | return err; |
369 | walk_page_range(vma, clear_refs_pte_range, NULL); | ||
370 | flush_tlb_mm(mm); | ||
371 | up_read(&mm->mmap_sem); | ||
372 | } | 604 | } |
373 | 605 | ||
374 | static void *m_start(struct seq_file *m, loff_t *pos) | 606 | static struct mm_walk pagemap_walk = { |
607 | .pmd_entry = pagemap_pte_range, | ||
608 | .pte_hole = pagemap_pte_hole | ||
609 | }; | ||
610 | |||
611 | /* | ||
612 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | ||
613 | * | ||
614 | * For each page in the address space, this file contains one 64-bit | ||
615 | * entry representing the corresponding physical page frame number | ||
616 | * (PFN) if the page is present. If there is a swap entry for the | ||
617 | * physical page, then an encoding of the swap file number and the | ||
618 | * page's offset into the swap file are returned. If no page is | ||
619 | * present at all, PM_NOT_PRESENT is returned. This allows determining | ||
620 | * precisely which pages are mapped (or in swap) and comparing mapped | ||
621 | * pages between processes. | ||
622 | * | ||
623 | * Efficient users of this interface will use /proc/pid/maps to | ||
624 | * determine which areas of memory are actually mapped and llseek to | ||
625 | * skip over unmapped regions. | ||
626 | */ | ||
627 | static ssize_t pagemap_read(struct file *file, char __user *buf, | ||
628 | size_t count, loff_t *ppos) | ||
375 | { | 629 | { |
376 | struct proc_maps_private *priv = m->private; | 630 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
377 | unsigned long last_addr = m->version; | 631 | struct page **pages, *page; |
632 | unsigned long uaddr, uend; | ||
378 | struct mm_struct *mm; | 633 | struct mm_struct *mm; |
379 | struct vm_area_struct *vma, *tail_vma = NULL; | 634 | struct pagemapread pm; |
380 | loff_t l = *pos; | 635 | int pagecount; |
381 | 636 | int ret = -ESRCH; | |
382 | /* Clear the per syscall fields in priv */ | ||
383 | priv->task = NULL; | ||
384 | priv->tail_vma = NULL; | ||
385 | 637 | ||
386 | /* | 638 | if (!task) |
387 | * We remember last_addr rather than next_addr to hit with | 639 | goto out; |
388 | * mmap_cache most of the time. We have zero last_addr at | ||
389 | * the beginning and also after lseek. We will have -1 last_addr | ||
390 | * after the end of the vmas. | ||
391 | */ | ||
392 | 640 | ||
393 | if (last_addr == -1UL) | 641 | ret = -EACCES; |
394 | return NULL; | 642 | if (!ptrace_may_attach(task)) |
643 | goto out; | ||
395 | 644 | ||
396 | priv->task = get_pid_task(priv->pid, PIDTYPE_PID); | 645 | ret = -EINVAL; |
397 | if (!priv->task) | 646 | /* file position must be aligned */ |
398 | return NULL; | 647 | if (*ppos % PM_ENTRY_BYTES) |
648 | goto out; | ||
399 | 649 | ||
400 | mm = mm_for_maps(priv->task); | 650 | ret = 0; |
651 | mm = get_task_mm(task); | ||
401 | if (!mm) | 652 | if (!mm) |
402 | return NULL; | ||
403 | |||
404 | priv->tail_vma = tail_vma = get_gate_vma(priv->task); | ||
405 | |||
406 | /* Start with last addr hint */ | ||
407 | if (last_addr && (vma = find_vma(mm, last_addr))) { | ||
408 | vma = vma->vm_next; | ||
409 | goto out; | 653 | goto out; |
410 | } | ||
411 | 654 | ||
412 | /* | 655 | ret = -ENOMEM; |
413 | * Check the vma index is within the range and do | 656 | uaddr = (unsigned long)buf & PAGE_MASK; |
414 | * sequential scan until m_index. | 657 | uend = (unsigned long)(buf + count); |
415 | */ | 658 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; |
416 | vma = NULL; | 659 | pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); |
417 | if ((unsigned long)l < mm->map_count) { | 660 | if (!pages) |
418 | vma = mm->mmap; | 661 | goto out_task; |
419 | while (l-- && vma) | ||
420 | vma = vma->vm_next; | ||
421 | goto out; | ||
422 | } | ||
423 | 662 | ||
424 | if (l != mm->map_count) | 663 | down_read(¤t->mm->mmap_sem); |
425 | tail_vma = NULL; /* After gate vma */ | 664 | ret = get_user_pages(current, current->mm, uaddr, pagecount, |
665 | 1, 0, pages, NULL); | ||
666 | up_read(¤t->mm->mmap_sem); | ||
426 | 667 | ||
427 | out: | 668 | if (ret < 0) |
428 | if (vma) | 669 | goto out_free; |
429 | return vma; | ||
430 | 670 | ||
431 | /* End of vmas has been reached */ | 671 | pm.out = buf; |
432 | m->version = (tail_vma != NULL)? 0: -1UL; | 672 | pm.end = buf + count; |
433 | up_read(&mm->mmap_sem); | ||
434 | mmput(mm); | ||
435 | return tail_vma; | ||
436 | } | ||
437 | 673 | ||
438 | static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) | 674 | if (!ptrace_may_attach(task)) { |
439 | { | 675 | ret = -EIO; |
440 | if (vma && vma != priv->tail_vma) { | 676 | } else { |
441 | struct mm_struct *mm = vma->vm_mm; | 677 | unsigned long src = *ppos; |
442 | up_read(&mm->mmap_sem); | 678 | unsigned long svpfn = src / PM_ENTRY_BYTES; |
443 | mmput(mm); | 679 | unsigned long start_vaddr = svpfn << PAGE_SHIFT; |
680 | unsigned long end_vaddr = TASK_SIZE_OF(task); | ||
681 | |||
682 | /* watch out for wraparound */ | ||
683 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | ||
684 | start_vaddr = end_vaddr; | ||
685 | |||
686 | /* | ||
687 | * The odds are that this will stop walking way | ||
688 | * before end_vaddr, because the length of the | ||
689 | * user buffer is tracked in "pm", and the walk | ||
690 | * will stop when we hit the end of the buffer. | ||
691 | */ | ||
692 | ret = walk_page_range(mm, start_vaddr, end_vaddr, | ||
693 | &pagemap_walk, &pm); | ||
694 | if (ret == PM_END_OF_BUFFER) | ||
695 | ret = 0; | ||
696 | /* don't need mmap_sem for these, but this looks cleaner */ | ||
697 | *ppos += pm.out - buf; | ||
698 | if (!ret) | ||
699 | ret = pm.out - buf; | ||
444 | } | 700 | } |
445 | } | ||
446 | |||
447 | static void *m_next(struct seq_file *m, void *v, loff_t *pos) | ||
448 | { | ||
449 | struct proc_maps_private *priv = m->private; | ||
450 | struct vm_area_struct *vma = v; | ||
451 | struct vm_area_struct *tail_vma = priv->tail_vma; | ||
452 | |||
453 | (*pos)++; | ||
454 | if (vma && (vma != tail_vma) && vma->vm_next) | ||
455 | return vma->vm_next; | ||
456 | vma_stop(priv, vma); | ||
457 | return (vma != tail_vma)? tail_vma: NULL; | ||
458 | } | ||
459 | |||
460 | static void m_stop(struct seq_file *m, void *v) | ||
461 | { | ||
462 | struct proc_maps_private *priv = m->private; | ||
463 | struct vm_area_struct *vma = v; | ||
464 | 701 | ||
465 | vma_stop(priv, vma); | 702 | for (; pagecount; pagecount--) { |
466 | if (priv->task) | 703 | page = pages[pagecount-1]; |
467 | put_task_struct(priv->task); | 704 | if (!PageReserved(page)) |
468 | } | 705 | SetPageDirty(page); |
469 | 706 | page_cache_release(page); | |
470 | static struct seq_operations proc_pid_maps_op = { | ||
471 | .start = m_start, | ||
472 | .next = m_next, | ||
473 | .stop = m_stop, | ||
474 | .show = show_map | ||
475 | }; | ||
476 | |||
477 | static struct seq_operations proc_pid_smaps_op = { | ||
478 | .start = m_start, | ||
479 | .next = m_next, | ||
480 | .stop = m_stop, | ||
481 | .show = show_smap | ||
482 | }; | ||
483 | |||
484 | static int do_maps_open(struct inode *inode, struct file *file, | ||
485 | struct seq_operations *ops) | ||
486 | { | ||
487 | struct proc_maps_private *priv; | ||
488 | int ret = -ENOMEM; | ||
489 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
490 | if (priv) { | ||
491 | priv->pid = proc_pid(inode); | ||
492 | ret = seq_open(file, ops); | ||
493 | if (!ret) { | ||
494 | struct seq_file *m = file->private_data; | ||
495 | m->private = priv; | ||
496 | } else { | ||
497 | kfree(priv); | ||
498 | } | ||
499 | } | 707 | } |
708 | mmput(mm); | ||
709 | out_free: | ||
710 | kfree(pages); | ||
711 | out_task: | ||
712 | put_task_struct(task); | ||
713 | out: | ||
500 | return ret; | 714 | return ret; |
501 | } | 715 | } |
502 | 716 | ||
503 | static int maps_open(struct inode *inode, struct file *file) | 717 | const struct file_operations proc_pagemap_operations = { |
504 | { | 718 | .llseek = mem_lseek, /* borrow this */ |
505 | return do_maps_open(inode, file, &proc_pid_maps_op); | 719 | .read = pagemap_read, |
506 | } | ||
507 | |||
508 | const struct file_operations proc_maps_operations = { | ||
509 | .open = maps_open, | ||
510 | .read = seq_read, | ||
511 | .llseek = seq_lseek, | ||
512 | .release = seq_release_private, | ||
513 | }; | 720 | }; |
721 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | ||
514 | 722 | ||
515 | #ifdef CONFIG_NUMA | 723 | #ifdef CONFIG_NUMA |
516 | extern int show_numa_map(struct seq_file *m, void *v); | 724 | extern int show_numa_map(struct seq_file *m, void *v); |
@@ -545,15 +753,3 @@ const struct file_operations proc_numa_maps_operations = { | |||
545 | .release = seq_release_private, | 753 | .release = seq_release_private, |
546 | }; | 754 | }; |
547 | #endif | 755 | #endif |
548 | |||
549 | static int smaps_open(struct inode *inode, struct file *file) | ||
550 | { | ||
551 | return do_maps_open(inode, file, &proc_pid_smaps_op); | ||
552 | } | ||
553 | |||
554 | const struct file_operations proc_smaps_operations = { | ||
555 | .open = smaps_open, | ||
556 | .read = seq_read, | ||
557 | .llseek = seq_lseek, | ||
558 | .release = seq_release_private, | ||
559 | }; | ||