aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c322
1 files changed, 222 insertions, 100 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 2302d228fe04..1002f473f497 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -51,6 +51,7 @@
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/memcontrol.h> 53#include <linux/memcontrol.h>
54#include <linux/mmu_notifier.h>
54 55
55#include <asm/pgalloc.h> 56#include <asm/pgalloc.h>
56#include <asm/uaccess.h> 57#include <asm/uaccess.h>
@@ -61,6 +62,8 @@
61#include <linux/swapops.h> 62#include <linux/swapops.h>
62#include <linux/elf.h> 63#include <linux/elf.h>
63 64
65#include "internal.h"
66
64#ifndef CONFIG_NEED_MULTIPLE_NODES 67#ifndef CONFIG_NEED_MULTIPLE_NODES
65/* use the per-pgdat data instead for discontigmem - mbligh */ 68/* use the per-pgdat data instead for discontigmem - mbligh */
66unsigned long max_mapnr; 69unsigned long max_mapnr;
@@ -211,7 +214,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
211 * 214 *
212 * Must be called with pagetable lock held. 215 * Must be called with pagetable lock held.
213 */ 216 */
214void free_pgd_range(struct mmu_gather **tlb, 217void free_pgd_range(struct mmu_gather *tlb,
215 unsigned long addr, unsigned long end, 218 unsigned long addr, unsigned long end,
216 unsigned long floor, unsigned long ceiling) 219 unsigned long floor, unsigned long ceiling)
217{ 220{
@@ -262,16 +265,16 @@ void free_pgd_range(struct mmu_gather **tlb,
262 return; 265 return;
263 266
264 start = addr; 267 start = addr;
265 pgd = pgd_offset((*tlb)->mm, addr); 268 pgd = pgd_offset(tlb->mm, addr);
266 do { 269 do {
267 next = pgd_addr_end(addr, end); 270 next = pgd_addr_end(addr, end);
268 if (pgd_none_or_clear_bad(pgd)) 271 if (pgd_none_or_clear_bad(pgd))
269 continue; 272 continue;
270 free_pud_range(*tlb, pgd, addr, next, floor, ceiling); 273 free_pud_range(tlb, pgd, addr, next, floor, ceiling);
271 } while (pgd++, addr = next, addr != end); 274 } while (pgd++, addr = next, addr != end);
272} 275}
273 276
274void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, 277void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
275 unsigned long floor, unsigned long ceiling) 278 unsigned long floor, unsigned long ceiling)
276{ 279{
277 while (vma) { 280 while (vma) {
@@ -372,7 +375,8 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
372 * 375 *
373 * The calling function must still handle the error. 376 * The calling function must still handle the error.
374 */ 377 */
375void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) 378static void print_bad_pte(struct vm_area_struct *vma, pte_t pte,
379 unsigned long vaddr)
376{ 380{
377 printk(KERN_ERR "Bad pte = %08llx, process = %s, " 381 printk(KERN_ERR "Bad pte = %08llx, process = %s, "
378 "vm_flags = %lx, vaddr = %lx\n", 382 "vm_flags = %lx, vaddr = %lx\n",
@@ -649,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
649 unsigned long next; 653 unsigned long next;
650 unsigned long addr = vma->vm_start; 654 unsigned long addr = vma->vm_start;
651 unsigned long end = vma->vm_end; 655 unsigned long end = vma->vm_end;
656 int ret;
652 657
653 /* 658 /*
654 * Don't copy ptes where a page fault will fill them correctly. 659 * Don't copy ptes where a page fault will fill them correctly.
@@ -664,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
664 if (is_vm_hugetlb_page(vma)) 669 if (is_vm_hugetlb_page(vma))
665 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 670 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
666 671
672 /*
673 * We need to invalidate the secondary MMU mappings only when
674 * there could be a permission downgrade on the ptes of the
675 * parent mm. And a permission downgrade will only happen if
676 * is_cow_mapping() returns true.
677 */
678 if (is_cow_mapping(vma->vm_flags))
679 mmu_notifier_invalidate_range_start(src_mm, addr, end);
680
681 ret = 0;
667 dst_pgd = pgd_offset(dst_mm, addr); 682 dst_pgd = pgd_offset(dst_mm, addr);
668 src_pgd = pgd_offset(src_mm, addr); 683 src_pgd = pgd_offset(src_mm, addr);
669 do { 684 do {
670 next = pgd_addr_end(addr, end); 685 next = pgd_addr_end(addr, end);
671 if (pgd_none_or_clear_bad(src_pgd)) 686 if (pgd_none_or_clear_bad(src_pgd))
672 continue; 687 continue;
673 if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, 688 if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
674 vma, addr, next)) 689 vma, addr, next))) {
675 return -ENOMEM; 690 ret = -ENOMEM;
691 break;
692 }
676 } while (dst_pgd++, src_pgd++, addr = next, addr != end); 693 } while (dst_pgd++, src_pgd++, addr = next, addr != end);
677 return 0; 694
695 if (is_cow_mapping(vma->vm_flags))
696 mmu_notifier_invalidate_range_end(src_mm,
697 vma->vm_start, end);
698 return ret;
678} 699}
679 700
680static unsigned long zap_pte_range(struct mmu_gather *tlb, 701static unsigned long zap_pte_range(struct mmu_gather *tlb,
@@ -878,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
878 unsigned long start = start_addr; 899 unsigned long start = start_addr;
879 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; 900 spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
880 int fullmm = (*tlbp)->fullmm; 901 int fullmm = (*tlbp)->fullmm;
902 struct mm_struct *mm = vma->vm_mm;
881 903
904 mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
882 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { 905 for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
883 unsigned long end; 906 unsigned long end;
884 907
@@ -899,9 +922,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
899 } 922 }
900 923
901 if (unlikely(is_vm_hugetlb_page(vma))) { 924 if (unlikely(is_vm_hugetlb_page(vma))) {
902 unmap_hugepage_range(vma, start, end); 925 /*
903 zap_work -= (end - start) / 926 * It is undesirable to test vma->vm_file as it
904 (HPAGE_SIZE / PAGE_SIZE); 927 * should be non-null for valid hugetlb area.
928 * However, vm_file will be NULL in the error
929 * cleanup path of do_mmap_pgoff. When
930 * hugetlbfs ->mmap method fails,
931 * do_mmap_pgoff() nullifies vma->vm_file
932 * before calling this function to clean up.
933 * Since no pte has actually been setup, it is
934 * safe to do nothing in this case.
935 */
936 if (vma->vm_file) {
937 unmap_hugepage_range(vma, start, end, NULL);
938 zap_work -= (end - start) /
939 pages_per_huge_page(hstate_vma(vma));
940 }
941
905 start = end; 942 start = end;
906 } else 943 } else
907 start = unmap_page_range(*tlbp, vma, 944 start = unmap_page_range(*tlbp, vma,
@@ -929,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
929 } 966 }
930 } 967 }
931out: 968out:
969 mmu_notifier_invalidate_range_end(mm, start_addr, end_addr);
932 return start; /* which is now the end (or restart) address */ 970 return start; /* which is now the end (or restart) address */
933} 971}
934 972
@@ -956,6 +994,29 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
956 return end; 994 return end;
957} 995}
958 996
997/**
998 * zap_vma_ptes - remove ptes mapping the vma
999 * @vma: vm_area_struct holding ptes to be zapped
1000 * @address: starting address of pages to zap
1001 * @size: number of bytes to zap
1002 *
1003 * This function only unmaps ptes assigned to VM_PFNMAP vmas.
1004 *
1005 * The entire address range must be fully contained within the vma.
1006 *
1007 * Returns 0 if successful.
1008 */
1009int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
1010 unsigned long size)
1011{
1012 if (address < vma->vm_start || address + size > vma->vm_end ||
1013 !(vma->vm_flags & VM_PFNMAP))
1014 return -1;
1015 zap_page_range(vma, address, size, NULL);
1016 return 0;
1017}
1018EXPORT_SYMBOL_GPL(zap_vma_ptes);
1019
959/* 1020/*
960 * Do a quick page-table lookup for a single page. 1021 * Do a quick page-table lookup for a single page.
961 */ 1022 */
@@ -982,19 +1043,24 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
982 goto no_page_table; 1043 goto no_page_table;
983 1044
984 pud = pud_offset(pgd, address); 1045 pud = pud_offset(pgd, address);
985 if (pud_none(*pud) || unlikely(pud_bad(*pud))) 1046 if (pud_none(*pud))
986 goto no_page_table; 1047 goto no_page_table;
987 1048 if (pud_huge(*pud)) {
1049 BUG_ON(flags & FOLL_GET);
1050 page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
1051 goto out;
1052 }
1053 if (unlikely(pud_bad(*pud)))
1054 goto no_page_table;
1055
988 pmd = pmd_offset(pud, address); 1056 pmd = pmd_offset(pud, address);
989 if (pmd_none(*pmd)) 1057 if (pmd_none(*pmd))
990 goto no_page_table; 1058 goto no_page_table;
991
992 if (pmd_huge(*pmd)) { 1059 if (pmd_huge(*pmd)) {
993 BUG_ON(flags & FOLL_GET); 1060 BUG_ON(flags & FOLL_GET);
994 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); 1061 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
995 goto out; 1062 goto out;
996 } 1063 }
997
998 if (unlikely(pmd_bad(*pmd))) 1064 if (unlikely(pmd_bad(*pmd)))
999 goto no_page_table; 1065 goto no_page_table;
1000 1066
@@ -1058,11 +1124,9 @@ static inline int use_zero_page(struct vm_area_struct *vma)
1058 if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) 1124 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
1059 return 0; 1125 return 0;
1060 /* 1126 /*
1061 * And if we have a fault or a nopfn routine, it's not an 1127 * And if we have a fault routine, it's not an anonymous region.
1062 * anonymous region.
1063 */ 1128 */
1064 return !vma->vm_ops || 1129 return !vma->vm_ops || !vma->vm_ops->fault;
1065 (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
1066} 1130}
1067 1131
1068int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1132int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
@@ -1338,6 +1402,11 @@ out:
1338 * 1402 *
1339 * This function should only be called from a vm_ops->fault handler, and 1403 * This function should only be called from a vm_ops->fault handler, and
1340 * in that case the handler should return NULL. 1404 * in that case the handler should return NULL.
1405 *
1406 * vma cannot be a COW mapping.
1407 *
1408 * As this is called only for pages that do not currently exist, we
1409 * do not need to flush old virtual caches or the TLB.
1341 */ 1410 */
1342int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, 1411int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
1343 unsigned long pfn) 1412 unsigned long pfn)
@@ -1548,6 +1617,8 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
1548 unsigned long next; 1617 unsigned long next;
1549 int err; 1618 int err;
1550 1619
1620 BUG_ON(pud_huge(*pud));
1621
1551 pmd = pmd_alloc(mm, pud, addr); 1622 pmd = pmd_alloc(mm, pud, addr);
1552 if (!pmd) 1623 if (!pmd)
1553 return -ENOMEM; 1624 return -ENOMEM;
@@ -1589,10 +1660,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1589{ 1660{
1590 pgd_t *pgd; 1661 pgd_t *pgd;
1591 unsigned long next; 1662 unsigned long next;
1592 unsigned long end = addr + size; 1663 unsigned long start = addr, end = addr + size;
1593 int err; 1664 int err;
1594 1665
1595 BUG_ON(addr >= end); 1666 BUG_ON(addr >= end);
1667 mmu_notifier_invalidate_range_start(mm, start, end);
1596 pgd = pgd_offset(mm, addr); 1668 pgd = pgd_offset(mm, addr);
1597 do { 1669 do {
1598 next = pgd_addr_end(addr, end); 1670 next = pgd_addr_end(addr, end);
@@ -1600,6 +1672,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
1600 if (err) 1672 if (err)
1601 break; 1673 break;
1602 } while (pgd++, addr = next, addr != end); 1674 } while (pgd++, addr = next, addr != end);
1675 mmu_notifier_invalidate_range_end(mm, start, end);
1603 return err; 1676 return err;
1604} 1677}
1605EXPORT_SYMBOL_GPL(apply_to_page_range); 1678EXPORT_SYMBOL_GPL(apply_to_page_range);
@@ -1716,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1716 * not dirty accountable. 1789 * not dirty accountable.
1717 */ 1790 */
1718 if (PageAnon(old_page)) { 1791 if (PageAnon(old_page)) {
1719 if (!TestSetPageLocked(old_page)) { 1792 if (trylock_page(old_page)) {
1720 reuse = can_share_swap_page(old_page); 1793 reuse = can_share_swap_page(old_page);
1721 unlock_page(old_page); 1794 unlock_page(old_page);
1722 } 1795 }
@@ -1812,7 +1885,7 @@ gotten:
1812 * seen in the presence of one thread doing SMC and another 1885 * seen in the presence of one thread doing SMC and another
1813 * thread doing COW. 1886 * thread doing COW.
1814 */ 1887 */
1815 ptep_clear_flush(vma, address, page_table); 1888 ptep_clear_flush_notify(vma, address, page_table);
1816 set_pte_at(mm, address, page_table, entry); 1889 set_pte_at(mm, address, page_table, entry);
1817 update_mmu_cache(vma, address, entry); 1890 update_mmu_cache(vma, address, entry);
1818 lru_cache_add_active(new_page); 1891 lru_cache_add_active(new_page);
@@ -2501,59 +2574,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2501 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); 2574 return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte);
2502} 2575}
2503 2576
2504
2505/*
2506 * do_no_pfn() tries to create a new page mapping for a page without
2507 * a struct_page backing it
2508 *
2509 * As this is called only for pages that do not currently exist, we
2510 * do not need to flush old virtual caches or the TLB.
2511 *
2512 * We enter with non-exclusive mmap_sem (to exclude vma changes,
2513 * but allow concurrent faults), and pte mapped but not yet locked.
2514 * We return with mmap_sem still held, but pte unmapped and unlocked.
2515 *
2516 * It is expected that the ->nopfn handler always returns the same pfn
2517 * for a given virtual mapping.
2518 *
2519 * Mark this `noinline' to prevent it from bloating the main pagefault code.
2520 */
2521static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma,
2522 unsigned long address, pte_t *page_table, pmd_t *pmd,
2523 int write_access)
2524{
2525 spinlock_t *ptl;
2526 pte_t entry;
2527 unsigned long pfn;
2528
2529 pte_unmap(page_table);
2530 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
2531 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
2532
2533 pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK);
2534
2535 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
2536
2537 if (unlikely(pfn == NOPFN_OOM))
2538 return VM_FAULT_OOM;
2539 else if (unlikely(pfn == NOPFN_SIGBUS))
2540 return VM_FAULT_SIGBUS;
2541 else if (unlikely(pfn == NOPFN_REFAULT))
2542 return 0;
2543
2544 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2545
2546 /* Only go through if we didn't race with anybody else... */
2547 if (pte_none(*page_table)) {
2548 entry = pfn_pte(pfn, vma->vm_page_prot);
2549 if (write_access)
2550 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2551 set_pte_at(mm, address, page_table, entry);
2552 }
2553 pte_unmap_unlock(page_table, ptl);
2554 return 0;
2555}
2556
2557/* 2577/*
2558 * Fault of a previously existing named mapping. Repopulate the pte 2578 * Fault of a previously existing named mapping. Repopulate the pte
2559 * from the encoded file_pte if possible. This enables swappable 2579 * from the encoded file_pte if possible. This enables swappable
@@ -2614,9 +2634,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
2614 if (likely(vma->vm_ops->fault)) 2634 if (likely(vma->vm_ops->fault))
2615 return do_linear_fault(mm, vma, address, 2635 return do_linear_fault(mm, vma, address,
2616 pte, pmd, write_access, entry); 2636 pte, pmd, write_access, entry);
2617 if (unlikely(vma->vm_ops->nopfn))
2618 return do_no_pfn(mm, vma, address, pte,
2619 pmd, write_access);
2620 } 2637 }
2621 return do_anonymous_page(mm, vma, address, 2638 return do_anonymous_page(mm, vma, address,
2622 pte, pmd, write_access); 2639 pte, pmd, write_access);
@@ -2748,16 +2765,26 @@ int make_pages_present(unsigned long addr, unsigned long end)
2748 2765
2749 vma = find_vma(current->mm, addr); 2766 vma = find_vma(current->mm, addr);
2750 if (!vma) 2767 if (!vma)
2751 return -1; 2768 return -ENOMEM;
2752 write = (vma->vm_flags & VM_WRITE) != 0; 2769 write = (vma->vm_flags & VM_WRITE) != 0;
2753 BUG_ON(addr >= end); 2770 BUG_ON(addr >= end);
2754 BUG_ON(end > vma->vm_end); 2771 BUG_ON(end > vma->vm_end);
2755 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; 2772 len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
2756 ret = get_user_pages(current, current->mm, addr, 2773 ret = get_user_pages(current, current->mm, addr,
2757 len, write, 0, NULL, NULL); 2774 len, write, 0, NULL, NULL);
2758 if (ret < 0) 2775 if (ret < 0) {
2776 /*
2777 SUS require strange return value to mlock
2778 - invalid addr generate to ENOMEM.
2779 - out of memory should generate EAGAIN.
2780 */
2781 if (ret == -EFAULT)
2782 ret = -ENOMEM;
2783 else if (ret == -ENOMEM)
2784 ret = -EAGAIN;
2759 return ret; 2785 return ret;
2760 return ret == len ? 0 : -1; 2786 }
2787 return ret == len ? 0 : -ENOMEM;
2761} 2788}
2762 2789
2763#if !defined(__HAVE_ARCH_GATE_AREA) 2790#if !defined(__HAVE_ARCH_GATE_AREA)
@@ -2804,6 +2831,86 @@ int in_gate_area_no_task(unsigned long addr)
2804 2831
2805#endif /* __HAVE_ARCH_GATE_AREA */ 2832#endif /* __HAVE_ARCH_GATE_AREA */
2806 2833
2834#ifdef CONFIG_HAVE_IOREMAP_PROT
2835static resource_size_t follow_phys(struct vm_area_struct *vma,
2836 unsigned long address, unsigned int flags,
2837 unsigned long *prot)
2838{
2839 pgd_t *pgd;
2840 pud_t *pud;
2841 pmd_t *pmd;
2842 pte_t *ptep, pte;
2843 spinlock_t *ptl;
2844 resource_size_t phys_addr = 0;
2845 struct mm_struct *mm = vma->vm_mm;
2846
2847 VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
2848
2849 pgd = pgd_offset(mm, address);
2850 if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
2851 goto no_page_table;
2852
2853 pud = pud_offset(pgd, address);
2854 if (pud_none(*pud) || unlikely(pud_bad(*pud)))
2855 goto no_page_table;
2856
2857 pmd = pmd_offset(pud, address);
2858 if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
2859 goto no_page_table;
2860
2861 /* We cannot handle huge page PFN maps. Luckily they don't exist. */
2862 if (pmd_huge(*pmd))
2863 goto no_page_table;
2864
2865 ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
2866 if (!ptep)
2867 goto out;
2868
2869 pte = *ptep;
2870 if (!pte_present(pte))
2871 goto unlock;
2872 if ((flags & FOLL_WRITE) && !pte_write(pte))
2873 goto unlock;
2874 phys_addr = pte_pfn(pte);
2875 phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
2876
2877 *prot = pgprot_val(pte_pgprot(pte));
2878
2879unlock:
2880 pte_unmap_unlock(ptep, ptl);
2881out:
2882 return phys_addr;
2883no_page_table:
2884 return 0;
2885}
2886
2887int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
2888 void *buf, int len, int write)
2889{
2890 resource_size_t phys_addr;
2891 unsigned long prot = 0;
2892 void *maddr;
2893 int offset = addr & (PAGE_SIZE-1);
2894
2895 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
2896 return -EINVAL;
2897
2898 phys_addr = follow_phys(vma, addr, write, &prot);
2899
2900 if (!phys_addr)
2901 return -EINVAL;
2902
2903 maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
2904 if (write)
2905 memcpy_toio(maddr + offset, buf, len);
2906 else
2907 memcpy_fromio(buf, maddr + offset, len);
2908 iounmap(maddr);
2909
2910 return len;
2911}
2912#endif
2913
2807/* 2914/*
2808 * Access another process' address space. 2915 * Access another process' address space.
2809 * Source/target buffer must be kernel space, 2916 * Source/target buffer must be kernel space,
@@ -2813,7 +2920,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
2813{ 2920{
2814 struct mm_struct *mm; 2921 struct mm_struct *mm;
2815 struct vm_area_struct *vma; 2922 struct vm_area_struct *vma;
2816 struct page *page;
2817 void *old_buf = buf; 2923 void *old_buf = buf;
2818 2924
2819 mm = get_task_mm(tsk); 2925 mm = get_task_mm(tsk);
@@ -2825,28 +2931,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
2825 while (len) { 2931 while (len) {
2826 int bytes, ret, offset; 2932 int bytes, ret, offset;
2827 void *maddr; 2933 void *maddr;
2934 struct page *page = NULL;
2828 2935
2829 ret = get_user_pages(tsk, mm, addr, 1, 2936 ret = get_user_pages(tsk, mm, addr, 1,
2830 write, 1, &page, &vma); 2937 write, 1, &page, &vma);
2831 if (ret <= 0) 2938 if (ret <= 0) {
2832 break; 2939 /*
2833 2940 * Check if this is a VM_IO | VM_PFNMAP VMA, which
2834 bytes = len; 2941 * we can access using slightly different code.
2835 offset = addr & (PAGE_SIZE-1); 2942 */
2836 if (bytes > PAGE_SIZE-offset) 2943#ifdef CONFIG_HAVE_IOREMAP_PROT
2837 bytes = PAGE_SIZE-offset; 2944 vma = find_vma(mm, addr);
2838 2945 if (!vma)
2839 maddr = kmap(page); 2946 break;
2840 if (write) { 2947 if (vma->vm_ops && vma->vm_ops->access)
2841 copy_to_user_page(vma, page, addr, 2948 ret = vma->vm_ops->access(vma, addr, buf,
2842 maddr + offset, buf, bytes); 2949 len, write);
2843 set_page_dirty_lock(page); 2950 if (ret <= 0)
2951#endif
2952 break;
2953 bytes = ret;
2844 } else { 2954 } else {
2845 copy_from_user_page(vma, page, addr, 2955 bytes = len;
2846 buf, maddr + offset, bytes); 2956 offset = addr & (PAGE_SIZE-1);
2957 if (bytes > PAGE_SIZE-offset)
2958 bytes = PAGE_SIZE-offset;
2959
2960 maddr = kmap(page);
2961 if (write) {
2962 copy_to_user_page(vma, page, addr,
2963 maddr + offset, buf, bytes);
2964 set_page_dirty_lock(page);
2965 } else {
2966 copy_from_user_page(vma, page, addr,
2967 buf, maddr + offset, bytes);
2968 }
2969 kunmap(page);
2970 page_cache_release(page);
2847 } 2971 }
2848 kunmap(page);
2849 page_cache_release(page);
2850 len -= bytes; 2972 len -= bytes;
2851 buf += bytes; 2973 buf += bytes;
2852 addr += bytes; 2974 addr += bytes;