diff options
| -rw-r--r-- | fs/proc/base.c | 4 | ||||
| -rw-r--r-- | fs/proc/internal.h | 2 | ||||
| -rw-r--r-- | fs/proc/task_mmu.c | 200 |
3 files changed, 204 insertions, 2 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c index 1bd646d3fe9a..9004db04efa0 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -783,7 +783,7 @@ out_no_task: | |||
| 783 | } | 783 | } |
| 784 | #endif | 784 | #endif |
| 785 | 785 | ||
| 786 | static loff_t mem_lseek(struct file * file, loff_t offset, int orig) | 786 | loff_t mem_lseek(struct file *file, loff_t offset, int orig) |
| 787 | { | 787 | { |
| 788 | switch (orig) { | 788 | switch (orig) { |
| 789 | case 0: | 789 | case 0: |
| @@ -2252,6 +2252,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 2252 | #ifdef CONFIG_MMU | 2252 | #ifdef CONFIG_MMU |
| 2253 | REG("clear_refs", S_IWUSR, clear_refs), | 2253 | REG("clear_refs", S_IWUSR, clear_refs), |
| 2254 | REG("smaps", S_IRUGO, smaps), | 2254 | REG("smaps", S_IRUGO, smaps), |
| 2255 | REG("pagemap", S_IRUSR, pagemap), | ||
| 2255 | #endif | 2256 | #endif |
| 2256 | #ifdef CONFIG_SECURITY | 2257 | #ifdef CONFIG_SECURITY |
| 2257 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), | 2258 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), |
| @@ -2580,6 +2581,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 2580 | #ifdef CONFIG_MMU | 2581 | #ifdef CONFIG_MMU |
| 2581 | REG("clear_refs", S_IWUSR, clear_refs), | 2582 | REG("clear_refs", S_IWUSR, clear_refs), |
| 2582 | REG("smaps", S_IRUGO, smaps), | 2583 | REG("smaps", S_IRUGO, smaps), |
| 2584 | REG("pagemap", S_IRUSR, pagemap), | ||
| 2583 | #endif | 2585 | #endif |
| 2584 | #ifdef CONFIG_SECURITY | 2586 | #ifdef CONFIG_SECURITY |
| 2585 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), | 2587 | DIR("attr", S_IRUGO|S_IXUGO, attr_dir), |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index ddfaeec37492..7d57e8069924 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -52,11 +52,13 @@ extern int proc_tid_stat(struct task_struct *, char *); | |||
| 52 | extern int proc_tgid_stat(struct task_struct *, char *); | 52 | extern int proc_tgid_stat(struct task_struct *, char *); |
| 53 | extern int proc_pid_status(struct task_struct *, char *); | 53 | extern int proc_pid_status(struct task_struct *, char *); |
| 54 | extern int proc_pid_statm(struct task_struct *, char *); | 54 | extern int proc_pid_statm(struct task_struct *, char *); |
| 55 | extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); | ||
| 55 | 56 | ||
| 56 | extern const struct file_operations proc_maps_operations; | 57 | extern const struct file_operations proc_maps_operations; |
| 57 | extern const struct file_operations proc_numa_maps_operations; | 58 | extern const struct file_operations proc_numa_maps_operations; |
| 58 | extern const struct file_operations proc_smaps_operations; | 59 | extern const struct file_operations proc_smaps_operations; |
| 59 | extern const struct file_operations proc_clear_refs_operations; | 60 | extern const struct file_operations proc_clear_refs_operations; |
| 61 | extern const struct file_operations proc_pagemap_operations; | ||
| 60 | 62 | ||
| 61 | void free_proc_entry(struct proc_dir_entry *de); | 63 | void free_proc_entry(struct proc_dir_entry *de); |
| 62 | 64 | ||
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 308fc5451e43..bbd9b145051d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -5,7 +5,10 @@ | |||
| 5 | #include <linux/highmem.h> | 5 | #include <linux/highmem.h> |
| 6 | #include <linux/ptrace.h> | 6 | #include <linux/ptrace.h> |
| 7 | #include <linux/pagemap.h> | 7 | #include <linux/pagemap.h> |
| 8 | #include <linux/ptrace.h> | ||
| 8 | #include <linux/mempolicy.h> | 9 | #include <linux/mempolicy.h> |
| 10 | #include <linux/swap.h> | ||
| 11 | #include <linux/swapops.h> | ||
| 9 | 12 | ||
| 10 | #include <asm/elf.h> | 13 | #include <asm/elf.h> |
| 11 | #include <asm/uaccess.h> | 14 | #include <asm/uaccess.h> |
| @@ -519,6 +522,202 @@ const struct file_operations proc_clear_refs_operations = { | |||
| 519 | .write = clear_refs_write, | 522 | .write = clear_refs_write, |
| 520 | }; | 523 | }; |
| 521 | 524 | ||
| 525 | struct pagemapread { | ||
| 526 | char __user *out, *end; | ||
| 527 | }; | ||
| 528 | |||
| 529 | #define PM_ENTRY_BYTES sizeof(u64) | ||
| 530 | #define PM_RESERVED_BITS 3 | ||
| 531 | #define PM_RESERVED_OFFSET (64 - PM_RESERVED_BITS) | ||
| 532 | #define PM_RESERVED_MASK (((1LL<<PM_RESERVED_BITS)-1) << PM_RESERVED_OFFSET) | ||
| 533 | #define PM_SPECIAL(nr) (((nr) << PM_RESERVED_OFFSET) | PM_RESERVED_MASK) | ||
| 534 | #define PM_NOT_PRESENT PM_SPECIAL(1LL) | ||
| 535 | #define PM_SWAP PM_SPECIAL(2LL) | ||
| 536 | #define PM_END_OF_BUFFER 1 | ||
| 537 | |||
| 538 | static int add_to_pagemap(unsigned long addr, u64 pfn, | ||
| 539 | struct pagemapread *pm) | ||
| 540 | { | ||
| 541 | /* | ||
| 542 | * Make sure there's room in the buffer for an | ||
| 543 | * entire entry. Otherwise, only copy part of | ||
| 544 | * the pfn. | ||
| 545 | */ | ||
| 546 | if (pm->out + PM_ENTRY_BYTES >= pm->end) { | ||
| 547 | if (copy_to_user(pm->out, &pfn, pm->end - pm->out)) | ||
| 548 | return -EFAULT; | ||
| 549 | pm->out = pm->end; | ||
| 550 | return PM_END_OF_BUFFER; | ||
| 551 | } | ||
| 552 | |||
| 553 | if (put_user(pfn, pm->out)) | ||
| 554 | return -EFAULT; | ||
| 555 | pm->out += PM_ENTRY_BYTES; | ||
| 556 | return 0; | ||
| 557 | } | ||
| 558 | |||
| 559 | static int pagemap_pte_hole(unsigned long start, unsigned long end, | ||
| 560 | void *private) | ||
| 561 | { | ||
| 562 | struct pagemapread *pm = private; | ||
| 563 | unsigned long addr; | ||
| 564 | int err = 0; | ||
| 565 | for (addr = start; addr < end; addr += PAGE_SIZE) { | ||
| 566 | err = add_to_pagemap(addr, PM_NOT_PRESENT, pm); | ||
| 567 | if (err) | ||
| 568 | break; | ||
| 569 | } | ||
| 570 | return err; | ||
| 571 | } | ||
| 572 | |||
| 573 | u64 swap_pte_to_pagemap_entry(pte_t pte) | ||
| 574 | { | ||
| 575 | swp_entry_t e = pte_to_swp_entry(pte); | ||
| 576 | return PM_SWAP | swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); | ||
| 577 | } | ||
| 578 | |||
| 579 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | ||
| 580 | void *private) | ||
| 581 | { | ||
| 582 | struct pagemapread *pm = private; | ||
| 583 | pte_t *pte; | ||
| 584 | int err = 0; | ||
| 585 | |||
| 586 | for (; addr != end; addr += PAGE_SIZE) { | ||
| 587 | u64 pfn = PM_NOT_PRESENT; | ||
| 588 | pte = pte_offset_map(pmd, addr); | ||
| 589 | if (is_swap_pte(*pte)) | ||
| 590 | pfn = swap_pte_to_pagemap_entry(*pte); | ||
| 591 | else if (pte_present(*pte)) | ||
| 592 | pfn = pte_pfn(*pte); | ||
| 593 | /* unmap so we're not in atomic when we copy to userspace */ | ||
| 594 | pte_unmap(pte); | ||
| 595 | err = add_to_pagemap(addr, pfn, pm); | ||
| 596 | if (err) | ||
| 597 | return err; | ||
| 598 | } | ||
| 599 | |||
| 600 | cond_resched(); | ||
| 601 | |||
| 602 | return err; | ||
| 603 | } | ||
| 604 | |||
| 605 | static struct mm_walk pagemap_walk = { | ||
| 606 | .pmd_entry = pagemap_pte_range, | ||
| 607 | .pte_hole = pagemap_pte_hole | ||
| 608 | }; | ||
| 609 | |||
| 610 | /* | ||
| 611 | * /proc/pid/pagemap - an array mapping virtual pages to pfns | ||
| 612 | * | ||
| 613 | * For each page in the address space, this file contains one 64-bit | ||
| 614 | * entry representing the corresponding physical page frame number | ||
| 615 | * (PFN) if the page is present. If there is a swap entry for the | ||
| 616 | * physical page, then an encoding of the swap file number and the | ||
| 617 | * page's offset into the swap file are returned. If no page is | ||
| 618 | * present at all, PM_NOT_PRESENT is returned. This allows determining | ||
| 619 | * precisely which pages are mapped (or in swap) and comparing mapped | ||
| 620 | * pages between processes. | ||
| 621 | * | ||
| 622 | * Efficient users of this interface will use /proc/pid/maps to | ||
| 623 | * determine which areas of memory are actually mapped and llseek to | ||
| 624 | * skip over unmapped regions. | ||
| 625 | */ | ||
| 626 | static ssize_t pagemap_read(struct file *file, char __user *buf, | ||
| 627 | size_t count, loff_t *ppos) | ||
| 628 | { | ||
| 629 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | ||
| 630 | struct page **pages, *page; | ||
| 631 | unsigned long uaddr, uend; | ||
| 632 | struct mm_struct *mm; | ||
| 633 | struct pagemapread pm; | ||
| 634 | int pagecount; | ||
| 635 | int ret = -ESRCH; | ||
| 636 | |||
| 637 | if (!task) | ||
| 638 | goto out; | ||
| 639 | |||
| 640 | ret = -EACCES; | ||
| 641 | if (!ptrace_may_attach(task)) | ||
| 642 | goto out; | ||
| 643 | |||
| 644 | ret = -EINVAL; | ||
| 645 | /* file position must be aligned */ | ||
| 646 | if (*ppos % PM_ENTRY_BYTES) | ||
| 647 | goto out; | ||
| 648 | |||
| 649 | ret = 0; | ||
| 650 | mm = get_task_mm(task); | ||
| 651 | if (!mm) | ||
| 652 | goto out; | ||
| 653 | |||
| 654 | ret = -ENOMEM; | ||
| 655 | uaddr = (unsigned long)buf & PAGE_MASK; | ||
| 656 | uend = (unsigned long)(buf + count); | ||
| 657 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; | ||
| 658 | pages = kmalloc(pagecount * sizeof(struct page *), GFP_KERNEL); | ||
| 659 | if (!pages) | ||
| 660 | goto out_task; | ||
| 661 | |||
| 662 | down_read(¤t->mm->mmap_sem); | ||
| 663 | ret = get_user_pages(current, current->mm, uaddr, pagecount, | ||
| 664 | 1, 0, pages, NULL); | ||
| 665 | up_read(¤t->mm->mmap_sem); | ||
| 666 | |||
| 667 | if (ret < 0) | ||
| 668 | goto out_free; | ||
| 669 | |||
| 670 | pm.out = buf; | ||
| 671 | pm.end = buf + count; | ||
| 672 | |||
| 673 | if (!ptrace_may_attach(task)) { | ||
| 674 | ret = -EIO; | ||
| 675 | } else { | ||
| 676 | unsigned long src = *ppos; | ||
| 677 | unsigned long svpfn = src / PM_ENTRY_BYTES; | ||
| 678 | unsigned long start_vaddr = svpfn << PAGE_SHIFT; | ||
| 679 | unsigned long end_vaddr = TASK_SIZE_OF(task); | ||
| 680 | |||
| 681 | /* watch out for wraparound */ | ||
| 682 | if (svpfn > TASK_SIZE_OF(task) >> PAGE_SHIFT) | ||
| 683 | start_vaddr = end_vaddr; | ||
| 684 | |||
| 685 | /* | ||
| 686 | * The odds are that this will stop walking way | ||
| 687 | * before end_vaddr, because the length of the | ||
| 688 | * user buffer is tracked in "pm", and the walk | ||
| 689 | * will stop when we hit the end of the buffer. | ||
| 690 | */ | ||
| 691 | ret = walk_page_range(mm, start_vaddr, end_vaddr, | ||
| 692 | &pagemap_walk, &pm); | ||
| 693 | if (ret == PM_END_OF_BUFFER) | ||
| 694 | ret = 0; | ||
| 695 | /* don't need mmap_sem for these, but this looks cleaner */ | ||
| 696 | *ppos += pm.out - buf; | ||
| 697 | if (!ret) | ||
| 698 | ret = pm.out - buf; | ||
| 699 | } | ||
| 700 | |||
| 701 | for (; pagecount; pagecount--) { | ||
| 702 | page = pages[pagecount-1]; | ||
| 703 | if (!PageReserved(page)) | ||
| 704 | SetPageDirty(page); | ||
| 705 | page_cache_release(page); | ||
| 706 | } | ||
| 707 | mmput(mm); | ||
| 708 | out_free: | ||
| 709 | kfree(pages); | ||
| 710 | out_task: | ||
| 711 | put_task_struct(task); | ||
| 712 | out: | ||
| 713 | return ret; | ||
| 714 | } | ||
| 715 | |||
| 716 | const struct file_operations proc_pagemap_operations = { | ||
| 717 | .llseek = mem_lseek, /* borrow this */ | ||
| 718 | .read = pagemap_read, | ||
| 719 | }; | ||
| 720 | |||
| 522 | #ifdef CONFIG_NUMA | 721 | #ifdef CONFIG_NUMA |
| 523 | extern int show_numa_map(struct seq_file *m, void *v); | 722 | extern int show_numa_map(struct seq_file *m, void *v); |
| 524 | 723 | ||
| @@ -552,4 +751,3 @@ const struct file_operations proc_numa_maps_operations = { | |||
| 552 | .release = seq_release_private, | 751 | .release = seq_release_private, |
| 553 | }; | 752 | }; |
| 554 | #endif | 753 | #endif |
| 555 | |||
