diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2010-04-01 20:11:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-04 15:06:02 -0400 |
commit | d82ef020cf31504c816803b1def94eb5ff173363 (patch) | |
tree | 1aac1f2260b602d28804a809783076d68a06bb33 /fs | |
parent | 5e11611a5d22252f3f9c169a3c9377eac0c32033 (diff) |
proc: pagemap: Hold mmap_sem during page walk
In initial design, walk_page_range() was designed just for walking page
table and it didn't require mmap_sem. Now, find_vma() etc.. are used
in walk_page_range() and we need mmap_sem around it.
This patch adds mmap_sem around walk_page_range().
Because /proc/<pid>/pagemap's callback routine use put_user(), we have
to get rid of it to do sane fix.
Changelog: 2010/Apr/2
- fixed start_vaddr and end overflow
Changelog: 2010/Apr/1
- fixed start_vaddr calculation
- removed unnecessary cast.
- removed unnecessary change in smaps.
- use GFP_TEMPORARY instead of GFP_KERNEL
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: San Mehat <san@google.com>
Cc: Brian Swetland <swetland@google.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
[ Fixed kmalloc failure return code as per Matt ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/proc/task_mmu.c | 87 |
1 files changed, 38 insertions, 49 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 183f8ff5f400..096273984c3b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -406,6 +406,7 @@ static int show_smap(struct seq_file *m, void *v) | |||
406 | 406 | ||
407 | memset(&mss, 0, sizeof mss); | 407 | memset(&mss, 0, sizeof mss); |
408 | mss.vma = vma; | 408 | mss.vma = vma; |
409 | /* mmap_sem is held in m_start */ | ||
409 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 410 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) |
410 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | 411 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); |
411 | 412 | ||
@@ -552,7 +553,8 @@ const struct file_operations proc_clear_refs_operations = { | |||
552 | }; | 553 | }; |
553 | 554 | ||
554 | struct pagemapread { | 555 | struct pagemapread { |
555 | u64 __user *out, *end; | 556 | int pos, len; |
557 | u64 *buffer; | ||
556 | }; | 558 | }; |
557 | 559 | ||
558 | #define PM_ENTRY_BYTES sizeof(u64) | 560 | #define PM_ENTRY_BYTES sizeof(u64) |
@@ -575,10 +577,8 @@ struct pagemapread { | |||
575 | static int add_to_pagemap(unsigned long addr, u64 pfn, | 577 | static int add_to_pagemap(unsigned long addr, u64 pfn, |
576 | struct pagemapread *pm) | 578 | struct pagemapread *pm) |
577 | { | 579 | { |
578 | if (put_user(pfn, pm->out)) | 580 | pm->buffer[pm->pos++] = pfn; |
579 | return -EFAULT; | 581 | if (pm->pos >= pm->len) |
580 | pm->out++; | ||
581 | if (pm->out >= pm->end) | ||
582 | return PM_END_OF_BUFFER; | 582 | return PM_END_OF_BUFFER; |
583 | return 0; | 583 | return 0; |
584 | } | 584 | } |
@@ -720,21 +720,20 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long addr, | |||
720 | * determine which areas of memory are actually mapped and llseek to | 720 | * determine which areas of memory are actually mapped and llseek to |
721 | * skip over unmapped regions. | 721 | * skip over unmapped regions. |
722 | */ | 722 | */ |
723 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | ||
723 | static ssize_t pagemap_read(struct file *file, char __user *buf, | 724 | static ssize_t pagemap_read(struct file *file, char __user *buf, |
724 | size_t count, loff_t *ppos) | 725 | size_t count, loff_t *ppos) |
725 | { | 726 | { |
726 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); | 727 | struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); |
727 | struct page **pages, *page; | ||
728 | unsigned long uaddr, uend; | ||
729 | struct mm_struct *mm; | 728 | struct mm_struct *mm; |
730 | struct pagemapread pm; | 729 | struct pagemapread pm; |
731 | int pagecount; | ||
732 | int ret = -ESRCH; | 730 | int ret = -ESRCH; |
733 | struct mm_walk pagemap_walk = {}; | 731 | struct mm_walk pagemap_walk = {}; |
734 | unsigned long src; | 732 | unsigned long src; |
735 | unsigned long svpfn; | 733 | unsigned long svpfn; |
736 | unsigned long start_vaddr; | 734 | unsigned long start_vaddr; |
737 | unsigned long end_vaddr; | 735 | unsigned long end_vaddr; |
736 | int copied = 0; | ||
738 | 737 | ||
739 | if (!task) | 738 | if (!task) |
740 | goto out; | 739 | goto out; |
@@ -757,35 +756,12 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
757 | if (!mm) | 756 | if (!mm) |
758 | goto out_task; | 757 | goto out_task; |
759 | 758 | ||
760 | 759 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | |
761 | uaddr = (unsigned long)buf & PAGE_MASK; | 760 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); |
762 | uend = (unsigned long)(buf + count); | ||
763 | pagecount = (PAGE_ALIGN(uend) - uaddr) / PAGE_SIZE; | ||
764 | ret = 0; | ||
765 | if (pagecount == 0) | ||
766 | goto out_mm; | ||
767 | pages = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); | ||
768 | ret = -ENOMEM; | 761 | ret = -ENOMEM; |
769 | if (!pages) | 762 | if (!pm.buffer) |
770 | goto out_mm; | 763 | goto out_mm; |
771 | 764 | ||
772 | down_read(¤t->mm->mmap_sem); | ||
773 | ret = get_user_pages(current, current->mm, uaddr, pagecount, | ||
774 | 1, 0, pages, NULL); | ||
775 | up_read(¤t->mm->mmap_sem); | ||
776 | |||
777 | if (ret < 0) | ||
778 | goto out_free; | ||
779 | |||
780 | if (ret != pagecount) { | ||
781 | pagecount = ret; | ||
782 | ret = -EFAULT; | ||
783 | goto out_pages; | ||
784 | } | ||
785 | |||
786 | pm.out = (u64 __user *)buf; | ||
787 | pm.end = (u64 __user *)(buf + count); | ||
788 | |||
789 | pagemap_walk.pmd_entry = pagemap_pte_range; | 765 | pagemap_walk.pmd_entry = pagemap_pte_range; |
790 | pagemap_walk.pte_hole = pagemap_pte_hole; | 766 | pagemap_walk.pte_hole = pagemap_pte_hole; |
791 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; | 767 | pagemap_walk.hugetlb_entry = pagemap_hugetlb_range; |
@@ -807,23 +783,36 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
807 | * user buffer is tracked in "pm", and the walk | 783 | * user buffer is tracked in "pm", and the walk |
808 | * will stop when we hit the end of the buffer. | 784 | * will stop when we hit the end of the buffer. |
809 | */ | 785 | */ |
810 | ret = walk_page_range(start_vaddr, end_vaddr, &pagemap_walk); | 786 | ret = 0; |
811 | if (ret == PM_END_OF_BUFFER) | 787 | while (count && (start_vaddr < end_vaddr)) { |
812 | ret = 0; | 788 | int len; |
813 | /* don't need mmap_sem for these, but this looks cleaner */ | 789 | unsigned long end; |
814 | *ppos += (char __user *)pm.out - buf; | 790 | |
815 | if (!ret) | 791 | pm.pos = 0; |
816 | ret = (char __user *)pm.out - buf; | 792 | end = start_vaddr + PAGEMAP_WALK_SIZE; |
817 | 793 | /* overflow ? */ | |
818 | out_pages: | 794 | if (end < start_vaddr || end > end_vaddr) |
819 | for (; pagecount; pagecount--) { | 795 | end = end_vaddr; |
820 | page = pages[pagecount-1]; | 796 | down_read(&mm->mmap_sem); |
821 | if (!PageReserved(page)) | 797 | ret = walk_page_range(start_vaddr, end, &pagemap_walk); |
822 | SetPageDirty(page); | 798 | up_read(&mm->mmap_sem); |
823 | page_cache_release(page); | 799 | start_vaddr = end; |
800 | |||
801 | len = min(count, PM_ENTRY_BYTES * pm.pos); | ||
802 | if (copy_to_user(buf, pm.buffer, len) < 0) { | ||
803 | ret = -EFAULT; | ||
804 | goto out_free; | ||
805 | } | ||
806 | copied += len; | ||
807 | buf += len; | ||
808 | count -= len; | ||
824 | } | 809 | } |
810 | *ppos += copied; | ||
811 | if (!ret || ret == PM_END_OF_BUFFER) | ||
812 | ret = copied; | ||
813 | |||
825 | out_free: | 814 | out_free: |
826 | kfree(pages); | 815 | kfree(pm.buffer); |
827 | out_mm: | 816 | out_mm: |
828 | mmput(mm); | 817 | mmput(mm); |
829 | out_task: | 818 | out_task: |