aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/internal.h2
-rw-r--r--fs/proc/task_mmu.c200
3 files changed, 204 insertions, 2 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1bd646d3fe9a..9004db04efa0 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -783,7 +783,7 @@ out_no_task:
783} 783}
784#endif 784#endif
785 785
786static loff_t mem_lseek(struct file * file, loff_t offset, int orig) 786loff_t mem_lseek(struct file *file, loff_t offset, int orig)
787{ 787{
788 switch (orig) { 788 switch (orig) {
789 case 0: 789 case 0:
@@ -2252,6 +2252,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2252#ifdef CONFIG_MMU 2252#ifdef CONFIG_MMU
2253 REG("clear_refs", S_IWUSR, clear_refs), 2253 REG("clear_refs", S_IWUSR, clear_refs),
2254 REG("smaps", S_IRUGO, smaps), 2254 REG("smaps", S_IRUGO, smaps),
2255 REG("pagemap", S_IRUSR, pagemap),
2255#endif 2256#endif
2256#ifdef CONFIG_SECURITY 2257#ifdef CONFIG_SECURITY
2257 DIR("attr", S_IRUGO|S_IXUGO, attr_dir), 2258 DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
@@ -2580,6 +2581,7 @@ static const struct pid_entry tid_base_stuff[] = {
2580#ifdef CONFIG_MMU 2581#ifdef CONFIG_MMU
2581 REG("clear_refs", S_IWUSR, clear_refs), 2582 REG("clear_refs", S_IWUSR, clear_refs),
2582 REG("smaps", S_IRUGO, smaps), 2583 REG("smaps", S_IRUGO, smaps),
2584 REG("pagemap", S_IRUSR, pagemap),
2583#endif 2585#endif
2584#ifdef CONFIG_SECURITY 2586#ifdef CONFIG_SECURITY
2585 DIR("attr", S_IRUGO|S_IXUGO, attr_dir), 2587 DIR("attr", S_IRUGO|S_IXUGO, attr_dir),
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index ddfaeec37492..7d57e8069924 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -52,11 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *);
52extern int proc_tgid_stat(struct task_struct *, char *); 52extern int proc_tgid_stat(struct task_struct *, char *);
53extern int proc_pid_status(struct task_struct *, char *); 53extern int proc_pid_status(struct task_struct *, char *);
54extern int proc_pid_statm(struct task_struct *, char *); 54extern int proc_pid_statm(struct task_struct *, char *);
55extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
55 56
56extern const struct file_operations proc_maps_operations; 57extern const struct file_operations proc_maps_operations;
57extern const struct file_operations proc_numa_maps_operations; 58extern const struct file_operations proc_numa_maps_operations;
58extern const struct file_operations proc_smaps_operations; 59extern const struct file_operations proc_smaps_operations;
59extern const struct file_operations proc_clear_refs_operations; 60extern const struct file_operations proc_clear_refs_operations;
61extern const struct file_operations proc_pagemap_operations;
60 62
61void free_proc_entry(struct proc_dir_entry *de); 63void free_proc_entry(struct proc_dir_entry *de);
62 64
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 308fc5451e43..bbd9b145051d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -5,7 +5,10 @@
5#include <linux/highmem.h> 5#include <linux/highmem.h>
6#include <linux/ptrace.h> 6#include <linux/ptrace.h>
7#include <linux/pagemap.h> 7#include <linux/pagemap.h>
8#include <linux/ptrace.h>
8#include <linux/mempolicy.h> 9#include <linux/mempolicy.h>
10#include <linux/swap.h>
11#include <linux/swapops.h>
9 12
10#include <asm/elf.h> 13#include <asm/elf.h>
11#include <asm/uaccess.h> 14#include <asm/uaccess.h>
@@ -519,6 +522,202 @@ const struct file_operations proc_clear_refs_operations = {
519 .write = clear_refs_write, 522 .write = clear_refs_write,
520}; 523};
521 524
525struct pagemapread {
526 char __user *out, *end;
527};
528
529#define PM_ENTRY_BYTES sizeof(u64)
530#define PM_RESERVED_BITS 3
531#define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS)
532#define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET)
533#define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK)
534#define PM_NOT_PRESENT PM_SPECIAL(1LL)
535#define PM_SWAP PM_SPECIAL(2LL)
536#define PM_END_OF_BUFFER 1
537
538static int add_to_pagemap(unsigned long addr, u64 pfn,
539 struct pagemapread *pm)
540{
541 /*
542 * Make sure there's room in the buffer for an
543 * entire entry. Otherwise, only copy part of
544 * the pfn.
545 */
546 if (pm->out + PM_ENTRY_BYTES >= pm->end) {
547 if (copy_to_user(pm->out, &pfn, pm->end - pm->out))
548 return -EFAULT;
549 pm->out = pm->end;
550 return PM_END_OF_BUFFER;
551 }
552
553 if (put_user(pfn, pm->out))
554 return -EFAULT;
555 pm->out += PM_ENTRY_BYTES;
556 return 0;
557}
558
559static int pagemap_pte_hole(unsigned long start, unsigned long end,
560 void *private)
561{
562 struct pagemapread *pm = private;
563 unsigned long addr;
564 int err = 0;
565 for (addr = start; addr < end; addr += PAGE_SIZE) {
566 err = add_to_pagemap(addr, PM_NOT_PRESENT, pm);
567 if (err)
568 break;
569 }
570 return err;
571}
572
573u64 swap_pte_to_pagemap_entry(pte_t pte)
574{
575 swp_entry_t e = pte_to_swp_entry(pte);
576 return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT);
577}
578
579static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
580 void *private)
581{
582 struct pagemapread *pm = private;
583 pte_t *pte;
584 int err = 0;
585
586 for (; addr != end; addr += PAGE_SIZE) {
587 u64 pfn = PM_NOT_PRESENT;
588 pte = pte_offset_map(pmd, addr);
589 if (is_swap_pte(*pte))
590 pfn = swap_pte_to_pagemap_entry(*pte);
591 else if (pte_present(*pte))
592 pfn = pte_pfn(*pte);
593 /* unmap so we're not in atomic when we copy to userspace */
594 pte_unmap(pte);
595 err = add_to_pagemap(addr, pfn, pm);
596 if (err)
597 return err;
598 }
599
600 cond_resched();
601
602 return err;
603}
604
605static struct mm_walk pagemap_walk = {
606 .pmd_entry = pagemap_pte_range,
607 .pte_hole = pagemap_pte_hole
608};
609
610/*
611 * /proc/pid/pagemap - an array mapping virtual pages to pfns
612 *
613 * For each page in the address space, this file contains one 64-bit
614 * entry representing the corresponding physical page frame number
615 * (PFN) if the page is present. If there is a swap entry for the
616 * physical page, then an encoding of the swap file number and the
617 * page's offset into the swap file are returned. If no page is
618 * present at all, PM_NOT_PRESENT is returned. This allows determining
619 * precisely which pages are mapped (or in swap) and comparing mapped
620 * pages between processes.
621 *
622 * Efficient users of this interface will use /proc/pid/maps to
623 * determine which areas of memory are actually mapped and llseek to
624 * skip over unmapped regions.
625 */
626static ssize_t pagemap_read(struct file *file, char __user *buf,
627 size_t count, loff_t *ppos)
628{
629 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
630 struct page **pages, *page;
631 unsigned long uaddr, uend;
632 struct mm_struct *mm;
633 struct pagemapread pm;
634 int pagecount;
635 int ret = -ESRCH;
636
637 if (!task)
638 goto out;
639
640 ret = -EACCES;
641 if (!ptrace_may_attach(task))
642 goto out;
643
644 ret = -EINVAL;
645 /* file position must be aligned */
646 if (*ppos % PM_ENTRY_BYTES)
647 goto out;
648
649 ret = 0;
650 mm = get_task_mm(task);
651 if (!mm)
652 goto out;
653
654 ret = -ENOMEM;
655 uaddr = (unsigned long)buf & PAGE_MASK;
656 uend = (unsigned long)(buf + count);
657 pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE;
658 pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL);
659 if (!pages)
660 goto out_task;
661
662 down_read(&current->mm->mmap_sem);
663 ret = get_user_pages(current, current->mm, uaddr, pagecount,
664 1, 0, pages, NULL);
665 up_read(&current->mm->mmap_sem);
666
667 if (ret < 0)
668 goto out_free;
669
670 pm.out = buf;
671 pm.end = buf + count;
672
673 if (!ptrace_may_attach(task)) {
674 ret = -EIO;
675 } else {
676 unsigned long src = *ppos;
677 unsigned long svpfn = src / PM_ENTRY_BYTES;
678 unsigned long start_vaddr = svpfn << PAGE_SHIFT;
679 unsigned long end_vaddr = TASK_SIZE_OF(task);
680
681 /* watch out for wraparound */
682 if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT)
683 start_vaddr = end_vaddr;
684
685 /*
686 * The odds are that this will stop walking way
687 * before end_vaddr, because the length of the
688 * user buffer is tracked in "pm", and the walk
689 * will stop when we hit the end of the buffer.
690 */
691 ret = walk_page_range(mm, start_vaddr, end_vaddr,
692 &pagemap_walk, &pm);
693 if (ret == PM_END_OF_BUFFER)
694 ret = 0;
695 /* don't need mmap_sem for these, but this looks cleaner */
696 *ppos += pm.out - buf;
697 if (!ret)
698 ret = pm.out - buf;
699 }
700
701 for (; pagecount; pagecount--) {
702 page = pages[pagecount-1];
703 if (!PageReserved(page))
704 SetPageDirty(page);
705 page_cache_release(page);
706 }
707 mmput(mm);
708out_free:
709 kfree(pages);
710out_task:
711 put_task_struct(task);
712out:
713 return ret;
714}
715
716const struct file_operations proc_pagemap_operations = {
717 .llseek = mem_lseek, /* borrow this */
718 .read = pagemap_read,
719};
720
522#ifdef CONFIG_NUMA 721#ifdef CONFIG_NUMA
523extern int show_numa_map(struct seq_file *m, void *v); 722extern int show_numa_map(struct seq_file *m, void *v);
524 723
@@ -552,4 +751,3 @@ const struct file_operations proc_numa_maps_operations = {
552 .release = seq_release_private, 751 .release = seq_release_private,
553}; 752};
554#endif 753#endif
555