diff options
-rw-r--r-- | fs/proc/base.c | 4 | ||||
-rw-r--r-- | fs/proc/internal.h | 2 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 200 |
3 files changed, 204 insertions, 2 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 1bd646d3fe9a..9004db04efa0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -783,7 +783,7 @@ out_no_task: | |||
783 | } | 783 | } |
784 | #endif | 784 | #endif |
785 | 785 | ||
786 | static loff_t mem_lseek(struct file * file, loff_t offset, int orig) | 786 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) |
787 | { | 787 | { |
788 | switch (orig) { | 788 | switch (orig) { |
789 | case 0: | 789 | case 0: |
@@ -2252,6 +2252,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2252 | #ifdef CONFIG_MMU | 2252 | #ifdef CONFIG_MMU |
2253 | REG("clear_refs", S_IWUSR, clear_refs), | 2253 | REG("clear_refs", S_IWUSR, clear_refs), |
2254 | REG("smaps", S_IRUGO, smaps), | 2254 | REG("smaps", S_IRUGO, smaps), |
2255 | REG("pagemap", S_IRUSR, pagemap), | ||
2255 | #endif | 2256 | #endif |
2256 | #ifdef CONFIG_SECURITY | 2257 | #ifdef CONFIG_SECURITY |
2257 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), | 2258 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), |
@@ -2580,6 +2581,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2580 | #ifdef CONFIG_MMU | 2581 | #ifdef CONFIG_MMU |
2581 | REG("clear_refs", S_IWUSR, clear_refs), | 2582 | REG("clear_refs", S_IWUSR, clear_refs), |
2582 | REG("smaps", S_IRUGO, smaps), | 2583 | REG("smaps", S_IRUGO, smaps), |
2584 | REG("pagemap", S_IRUSR, pagemap), | ||
2583 | #endif | 2585 | #endif |
2584 | #ifdef CONFIG_SECURITY | 2586 | #ifdef CONFIG_SECURITY |
2585 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), | 2587 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index ddfaeec37492..7d57e8069924 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -52,11 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *); | |||
52 | extern int proc_tgid_stat(struct task_struct *, char *); | 52 | extern int proc_tgid_stat(struct task_struct *, char *); |
53 | extern int proc_pid_status(struct task_struct *, char *); | 53 | extern int proc_pid_status(struct task_struct *, char *); |
54 | extern int proc_pid_statm(struct task_struct *, char *); | 54 | extern int proc_pid_statm(struct task_struct *, char *); |
55 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); | ||
55 | 56 | ||
56 | extern const struct file_operations proc_maps_operations; | 57 | extern const struct file_operations proc_maps_operations; |
57 | extern const struct file_operations proc_numa_maps_operations; | 58 | extern const struct file_operations proc_numa_maps_operations; |
58 | extern const struct file_operations proc_smaps_operations; | 59 | extern const struct file_operations proc_smaps_operations; |
59 | extern const struct file_operations proc_clear_refs_operations; | 60 | extern const struct file_operations proc_clear_refs_operations; |
61 | extern const struct file_operations proc_pagemap_operations; | ||
60 | 62 | ||
61 | void free_proc_entry(struct proc_dir_entry *de); | 63 | void free_proc_entry(struct proc_dir_entry *de); |
62 | 64 | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 308fc5451e43..bbd9b145051d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -5,7 +5,10 @@ | |||
5 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
6 | #include <linux/ptrace.h> | 6 | #include <linux/ptrace.h> |
7 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
8 | #include <linux/ptrace.h> | ||
8 | #include <linux/mempolicy.h> | 9 | #include <linux/mempolicy.h> |
10 | #include <linux/swap.h> | ||
11 | #include <linux/swapops.h> | ||
9 | 12 | ||
10 | #include <asm/elf.h> | 13 | #include <asm/elf.h> |
11 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
@@ -519,6 +522,202 @@ const struct file_operations proc_clear_refs_operations = { | |||
519 | .write = clear_refs_write, | 522 | .write = clear_refs_write, |
520 | }; | 523 | }; |
521 | 524 | ||
525 | struct pagemapread { | ||
526 | char __user *out, *end; | ||
527 | }; | ||
528 | |||
529 | #define PM_ENTRY_BYTES sizeof(u64) | ||
530 | #define PM_RESERVED_BITS 3 | ||
531 | #define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) | ||
532 | #define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) | ||
533 | #define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) | ||
534 | #define PM_NOT_PRESENT PM_SPECIAL(1LL) | ||
535 | #define PM_SWAP PM_SPECIAL(2LL) | ||
536 | #define PM_END_OF_BUFFER 1 | ||
537 | |||
538 | static int add_to_pagemap(unsigned long addr, u64 pfn, | ||
539 | struct pagemapread *pm) | ||
540 | { | ||
541 | /* | ||
542 | * Make sure there's room in the buffer for an | ||
543 | * entire entry. Otherwise, only copy part of | ||
544 | * the pfn. | ||
545 | */ | ||
546 | if (pm->out + PM_ENTRY_BYTES >= pm->end) { | ||
547 | if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) | ||
548 | return -EFAULT; | ||
549 | pm->out = pm->end; | ||
550 | return PM_END_OF_BUFFER; | ||
551 | } | ||
552 | |||
553 | if (put_user(pfn, pm->out)) | ||
554 | return -EFAULT; | ||
555 | pm->out += PM_ENTRY_BYTES; | ||
556 | return 0; | ||
557 | } | ||
558 | |||
559 | static int pagemap_pte_hole(unsigned long start, unsigned long end, | ||
560 | void *private) | ||
561 | { | ||
562 | struct pagemapread *pm = private; | ||
563 | unsigned long addr; | ||
564 | int err = 0; | ||
565 | for (addr = start; addr < end; addr += PAGE_SIZE) { | ||
566 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); | ||
567 | if (err) | ||
568 | break; | ||
569 | } | ||
570 | return err; | ||
571 | } | ||
572 | |||
573 | u64 swap_pte_to_pagemap_entry(pte_t pte) | ||
574 | { | ||
575 | swp_entry_t e = pte_to_swp_entry(pte); | ||
576 | return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); | ||
577 | } | ||
578 | |||
579 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | ||
580 | void *private) | ||
581 | { | ||
582 | struct pagemapread *pm = private; | ||
583 | pte_t *pte; | ||
584 | int err = 0; | ||
585 | |||
586 | for (; addr != end; addr += PAGE_SIZE) { | ||
587 | u64 pfn = PM_NOT_PRESENT; | ||
588 | pte = pte_offset_map(pmd, addr); | ||
589 | if (is_swap_pte(*pte)) | ||
590 | pfn = swap_pte_to_pagemap_entry(*pte); | ||
591 | else if (pte_present(*pte)) | ||
592 | pfn = pte_pfn(*pte); | ||
593 | /* unmap so we're not in atomic when we copy to userspace */ | ||
594 | pte_unmap(pte); | ||
595 | err = add_to_pagemap(addr, pfn, pm); | ||
596 | if (err) | ||
597 | return err; | ||
598 | } | ||
599 | |||
600 | cond_resched(); | ||
601 | |||
602 | return err; | ||
603 | } | ||
604 | |||
605 | static struct mm_walk pagemap_walk = { | ||
606 | .pmd_entry = pagemap_pte_range, | ||
607 | .pte_hole = pagemap_pte_hole | ||
608 | }; | ||
609 | |||
610 | /* | ||
611 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | ||
612 | * | ||
613 | * For each page in the address space, this file contains one 64-bit | ||
614 | * entry representing the corresponding physical page frame number | ||
615 | * (PFN) if the page is present. If there is a swap entry for the | ||
616 | * physical page, then an encoding of the swap file number and the | ||
617 | * page's offset into the swap file are returned. If no page is | ||
618 | * present at all, PM_NOT_PRESENT is returned. This allows determining | ||
619 | * precisely which pages are mapped (or in swap) and comparing mapped | ||
620 | * pages between processes. | ||
621 | * | ||
622 | * Efficient users of this interface will use /proc/pid/maps to | ||
623 | * determine which areas of memory are actually mapped and llseek to | ||
624 | * skip over unmapped regions. | ||
625 | */ | ||
626 | static ssize_t pagemap_read(struct file *file, char __user *buf, | ||
627 | size_t count, loff_t *ppos) | ||
628 | { | ||
629 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | ||
630 | struct page **pages, *page; | ||
631 | unsigned long uaddr, uend; | ||
632 | struct mm_struct *mm; | ||
633 | struct pagemapread pm; | ||
634 | int pagecount; | ||
635 | int ret = -ESRCH; | ||
636 | |||
637 | if (!task) | ||
638 | goto out; | ||
639 | |||
640 | ret = -EACCES; | ||
641 | if (!ptrace_may_attach(task)) | ||
642 | goto out; | ||
643 | |||
644 | ret = -EINVAL; | ||
645 | /* file position must be aligned */ | ||
646 | if (*ppos % PM_ENTRY_BYTES) | ||
647 | goto out; | ||
648 | |||
649 | ret = 0; | ||
650 | mm = get_task_mm(task); | ||
651 | if (!mm) | ||
652 | goto out; | ||
653 | |||
654 | ret = -ENOMEM; | ||
655 | uaddr = (unsigned long)buf & PAGE_MASK; | ||
656 | uend = (unsigned long)(buf + count); | ||
657 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; | ||
658 | pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); | ||
659 | if (!pages) | ||
660 | goto out_task; | ||
661 | |||
662 | down_read(¤t->mm->mmap_sem); | ||
663 | ret = get_user_pages(current, current->mm, uaddr, pagecount, | ||
664 | 1, 0, pages, NULL); | ||
665 | up_read(¤t->mm->mmap_sem); | ||
666 | |||
667 | if (ret < 0) | ||
668 | goto out_free; | ||
669 | |||
670 | pm.out = buf; | ||
671 | pm.end = buf + count; | ||
672 | |||
673 | if (!ptrace_may_attach(task)) { | ||
674 | ret = -EIO; | ||
675 | } else { | ||
676 | unsigned long src = *ppos; | ||
677 | unsigned long svpfn = src / PM_ENTRY_BYTES; | ||
678 | unsigned long start_vaddr = svpfn << PAGE_SHIFT; | ||
679 | unsigned long end_vaddr = TASK_SIZE_OF(task); | ||
680 | |||
681 | /* watch out for wraparound */ | ||
682 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | ||
683 | start_vaddr = end_vaddr; | ||
684 | |||
685 | /* | ||
686 | * The odds are that this will stop walking way | ||
687 | * before end_vaddr, because the length of the | ||
688 | * user buffer is tracked in "pm", and the walk | ||
689 | * will stop when we hit the end of the buffer. | ||
690 | */ | ||
691 | ret = walk_page_range(mm, start_vaddr, end_vaddr, | ||
692 | &pagemap_walk, &pm); | ||
693 | if (ret == PM_END_OF_BUFFER) | ||
694 | ret = 0; | ||
695 | /* don't need mmap_sem for these, but this looks cleaner */ | ||
696 | *ppos += pm.out - buf; | ||
697 | if (!ret) | ||
698 | ret = pm.out - buf; | ||
699 | } | ||
700 | |||
701 | for (; pagecount; pagecount--) { | ||
702 | page = pages[pagecount-1]; | ||
703 | if (!PageReserved(page)) | ||
704 | SetPageDirty(page); | ||
705 | page_cache_release(page); | ||
706 | } | ||
707 | mmput(mm); | ||
708 | out_free: | ||
709 | kfree(pages); | ||
710 | out_task: | ||
711 | put_task_struct(task); | ||
712 | out: | ||
713 | return ret; | ||
714 | } | ||
715 | |||
716 | const struct file_operations proc_pagemap_operations = { | ||
717 | .llseek = mem_lseek, /* borrow this */ | ||
718 | .read = pagemap_read, | ||
719 | }; | ||
720 | |||
522 | #ifdef CONFIG_NUMA | 721 | #ifdef CONFIG_NUMA |
523 | extern int show_numa_map(struct seq_file *m, void *v); | 722 | extern int show_numa_map(struct seq_file *m, void *v); |
524 | 723 | ||
@@ -552,4 +751,3 @@ const struct file_operations proc_numa_maps_operations = { | |||
552 | .release = seq_release_private, | 751 | .release = seq_release_private, |
553 | }; | 752 | }; |
554 | #endif | 753 | #endif |
555 | |||