diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 322 |
1 files changed, 222 insertions, 100 deletions
diff --git a/mm/memory.c b/mm/memory.c index 2302d228fe04..1002f473f497 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/init.h> | 51 | #include <linux/init.h> |
52 | #include <linux/writeback.h> | 52 | #include <linux/writeback.h> |
53 | #include <linux/memcontrol.h> | 53 | #include <linux/memcontrol.h> |
54 | #include <linux/mmu_notifier.h> | ||
54 | 55 | ||
55 | #include <asm/pgalloc.h> | 56 | #include <asm/pgalloc.h> |
56 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
@@ -61,6 +62,8 @@ | |||
61 | #include <linux/swapops.h> | 62 | #include <linux/swapops.h> |
62 | #include <linux/elf.h> | 63 | #include <linux/elf.h> |
63 | 64 | ||
65 | #include "internal.h" | ||
66 | |||
64 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 67 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
65 | /* use the per-pgdat data instead for discontigmem - mbligh */ | 68 | /* use the per-pgdat data instead for discontigmem - mbligh */ |
66 | unsigned long max_mapnr; | 69 | unsigned long max_mapnr; |
@@ -211,7 +214,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
211 | * | 214 | * |
212 | * Must be called with pagetable lock held. | 215 | * Must be called with pagetable lock held. |
213 | */ | 216 | */ |
214 | void free_pgd_range(struct mmu_gather **tlb, | 217 | void free_pgd_range(struct mmu_gather *tlb, |
215 | unsigned long addr, unsigned long end, | 218 | unsigned long addr, unsigned long end, |
216 | unsigned long floor, unsigned long ceiling) | 219 | unsigned long floor, unsigned long ceiling) |
217 | { | 220 | { |
@@ -262,16 +265,16 @@ void free_pgd_range(struct mmu_gather **tlb, | |||
262 | return; | 265 | return; |
263 | 266 | ||
264 | start = addr; | 267 | start = addr; |
265 | pgd = pgd_offset((*tlb)->mm, addr); | 268 | pgd = pgd_offset(tlb->mm, addr); |
266 | do { | 269 | do { |
267 | next = pgd_addr_end(addr, end); | 270 | next = pgd_addr_end(addr, end); |
268 | if (pgd_none_or_clear_bad(pgd)) | 271 | if (pgd_none_or_clear_bad(pgd)) |
269 | continue; | 272 | continue; |
270 | free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | 273 | free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
271 | } while (pgd++, addr = next, addr != end); | 274 | } while (pgd++, addr = next, addr != end); |
272 | } | 275 | } |
273 | 276 | ||
274 | void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, | 277 | void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, |
275 | unsigned long floor, unsigned long ceiling) | 278 | unsigned long floor, unsigned long ceiling) |
276 | { | 279 | { |
277 | while (vma) { | 280 | while (vma) { |
@@ -372,7 +375,8 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
372 | * | 375 | * |
373 | * The calling function must still handle the error. | 376 | * The calling function must still handle the error. |
374 | */ | 377 | */ |
375 | void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) | 378 | static void print_bad_pte(struct vm_area_struct *vma, pte_t pte, |
379 | unsigned long vaddr) | ||
376 | { | 380 | { |
377 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " | 381 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " |
378 | "vm_flags = %lx, vaddr = %lx\n", | 382 | "vm_flags = %lx, vaddr = %lx\n", |
@@ -649,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
649 | unsigned long next; | 653 | unsigned long next; |
650 | unsigned long addr = vma->vm_start; | 654 | unsigned long addr = vma->vm_start; |
651 | unsigned long end = vma->vm_end; | 655 | unsigned long end = vma->vm_end; |
656 | int ret; | ||
652 | 657 | ||
653 | /* | 658 | /* |
654 | * Don't copy ptes where a page fault will fill them correctly. | 659 | * Don't copy ptes where a page fault will fill them correctly. |
@@ -664,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
664 | if (is_vm_hugetlb_page(vma)) | 669 | if (is_vm_hugetlb_page(vma)) |
665 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); | 670 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); |
666 | 671 | ||
672 | /* | ||
673 | * We need to invalidate the secondary MMU mappings only when | ||
674 | * there could be a permission downgrade on the ptes of the | ||
675 | * parent mm. And a permission downgrade will only happen if | ||
676 | * is_cow_mapping() returns true. | ||
677 | */ | ||
678 | if (is_cow_mapping(vma->vm_flags)) | ||
679 | mmu_notifier_invalidate_range_start(src_mm, addr, end); | ||
680 | |||
681 | ret = 0; | ||
667 | dst_pgd = pgd_offset(dst_mm, addr); | 682 | dst_pgd = pgd_offset(dst_mm, addr); |
668 | src_pgd = pgd_offset(src_mm, addr); | 683 | src_pgd = pgd_offset(src_mm, addr); |
669 | do { | 684 | do { |
670 | next = pgd_addr_end(addr, end); | 685 | next = pgd_addr_end(addr, end); |
671 | if (pgd_none_or_clear_bad(src_pgd)) | 686 | if (pgd_none_or_clear_bad(src_pgd)) |
672 | continue; | 687 | continue; |
673 | if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, | 688 | if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, |
674 | vma, addr, next)) | 689 | vma, addr, next))) { |
675 | return -ENOMEM; | 690 | ret = -ENOMEM; |
691 | break; | ||
692 | } | ||
676 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); | 693 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); |
677 | return 0; | 694 | |
695 | if (is_cow_mapping(vma->vm_flags)) | ||
696 | mmu_notifier_invalidate_range_end(src_mm, | ||
697 | vma->vm_start, end); | ||
698 | return ret; | ||
678 | } | 699 | } |
679 | 700 | ||
680 | static unsigned long zap_pte_range(struct mmu_gather *tlb, | 701 | static unsigned long zap_pte_range(struct mmu_gather *tlb, |
@@ -878,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
878 | unsigned long start = start_addr; | 899 | unsigned long start = start_addr; |
879 | spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; | 900 | spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; |
880 | int fullmm = (*tlbp)->fullmm; | 901 | int fullmm = (*tlbp)->fullmm; |
902 | struct mm_struct *mm = vma->vm_mm; | ||
881 | 903 | ||
904 | mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); | ||
882 | for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { | 905 | for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { |
883 | unsigned long end; | 906 | unsigned long end; |
884 | 907 | ||
@@ -899,9 +922,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
899 | } | 922 | } |
900 | 923 | ||
901 | if (unlikely(is_vm_hugetlb_page(vma))) { | 924 | if (unlikely(is_vm_hugetlb_page(vma))) { |
902 | unmap_hugepage_range(vma, start, end); | 925 | /* |
903 | zap_work -= (end - start) / | 926 | * It is undesirable to test vma->vm_file as it |
904 | (HPAGE_SIZE / PAGE_SIZE); | 927 | * should be non-null for valid hugetlb area. |
928 | * However, vm_file will be NULL in the error | ||
929 | * cleanup path of do_mmap_pgoff. When | ||
930 | * hugetlbfs ->mmap method fails, | ||
931 | * do_mmap_pgoff() nullifies vma->vm_file | ||
932 | * before calling this function to clean up. | ||
933 | * Since no pte has actually been setup, it is | ||
934 | * safe to do nothing in this case. | ||
935 | */ | ||
936 | if (vma->vm_file) { | ||
937 | unmap_hugepage_range(vma, start, end, NULL); | ||
938 | zap_work -= (end - start) / | ||
939 | pages_per_huge_page(hstate_vma(vma)); | ||
940 | } | ||
941 | |||
905 | start = end; | 942 | start = end; |
906 | } else | 943 | } else |
907 | start = unmap_page_range(*tlbp, vma, | 944 | start = unmap_page_range(*tlbp, vma, |
@@ -929,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
929 | } | 966 | } |
930 | } | 967 | } |
931 | out: | 968 | out: |
969 | mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); | ||
932 | return start; /* which is now the end (or restart) address */ | 970 | return start; /* which is now the end (or restart) address */ |
933 | } | 971 | } |
934 | 972 | ||
@@ -956,6 +994,29 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
956 | return end; | 994 | return end; |
957 | } | 995 | } |
958 | 996 | ||
997 | /** | ||
998 | * zap_vma_ptes - remove ptes mapping the vma | ||
999 | * @vma: vm_area_struct holding ptes to be zapped | ||
1000 | * @address: starting address of pages to zap | ||
1001 | * @size: number of bytes to zap | ||
1002 | * | ||
1003 | * This function only unmaps ptes assigned to VM_PFNMAP vmas. | ||
1004 | * | ||
1005 | * The entire address range must be fully contained within the vma. | ||
1006 | * | ||
1007 | * Returns 0 if successful. | ||
1008 | */ | ||
1009 | int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, | ||
1010 | unsigned long size) | ||
1011 | { | ||
1012 | if (address < vma->vm_start || address + size > vma->vm_end || | ||
1013 | !(vma->vm_flags & VM_PFNMAP)) | ||
1014 | return -1; | ||
1015 | zap_page_range(vma, address, size, NULL); | ||
1016 | return 0; | ||
1017 | } | ||
1018 | EXPORT_SYMBOL_GPL(zap_vma_ptes); | ||
1019 | |||
959 | /* | 1020 | /* |
960 | * Do a quick page-table lookup for a single page. | 1021 | * Do a quick page-table lookup for a single page. |
961 | */ | 1022 | */ |
@@ -982,19 +1043,24 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
982 | goto no_page_table; | 1043 | goto no_page_table; |
983 | 1044 | ||
984 | pud = pud_offset(pgd, address); | 1045 | pud = pud_offset(pgd, address); |
985 | if (pud_none(*pud) || unlikely(pud_bad(*pud))) | 1046 | if (pud_none(*pud)) |
986 | goto no_page_table; | 1047 | goto no_page_table; |
987 | 1048 | if (pud_huge(*pud)) { | |
1049 | BUG_ON(flags & FOLL_GET); | ||
1050 | page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); | ||
1051 | goto out; | ||
1052 | } | ||
1053 | if (unlikely(pud_bad(*pud))) | ||
1054 | goto no_page_table; | ||
1055 | |||
988 | pmd = pmd_offset(pud, address); | 1056 | pmd = pmd_offset(pud, address); |
989 | if (pmd_none(*pmd)) | 1057 | if (pmd_none(*pmd)) |
990 | goto no_page_table; | 1058 | goto no_page_table; |
991 | |||
992 | if (pmd_huge(*pmd)) { | 1059 | if (pmd_huge(*pmd)) { |
993 | BUG_ON(flags & FOLL_GET); | 1060 | BUG_ON(flags & FOLL_GET); |
994 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); | 1061 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); |
995 | goto out; | 1062 | goto out; |
996 | } | 1063 | } |
997 | |||
998 | if (unlikely(pmd_bad(*pmd))) | 1064 | if (unlikely(pmd_bad(*pmd))) |
999 | goto no_page_table; | 1065 | goto no_page_table; |
1000 | 1066 | ||
@@ -1058,11 +1124,9 @@ static inline int use_zero_page(struct vm_area_struct *vma) | |||
1058 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) | 1124 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) |
1059 | return 0; | 1125 | return 0; |
1060 | /* | 1126 | /* |
1061 | * And if we have a fault or a nopfn routine, it's not an | 1127 | * And if we have a fault routine, it's not an anonymous region. |
1062 | * anonymous region. | ||
1063 | */ | 1128 | */ |
1064 | return !vma->vm_ops || | 1129 | return !vma->vm_ops || !vma->vm_ops->fault; |
1065 | (!vma->vm_ops->fault && !vma->vm_ops->nopfn); | ||
1066 | } | 1130 | } |
1067 | 1131 | ||
1068 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1132 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
@@ -1338,6 +1402,11 @@ out: | |||
1338 | * | 1402 | * |
1339 | * This function should only be called from a vm_ops->fault handler, and | 1403 | * This function should only be called from a vm_ops->fault handler, and |
1340 | * in that case the handler should return NULL. | 1404 | * in that case the handler should return NULL. |
1405 | * | ||
1406 | * vma cannot be a COW mapping. | ||
1407 | * | ||
1408 | * As this is called only for pages that do not currently exist, we | ||
1409 | * do not need to flush old virtual caches or the TLB. | ||
1341 | */ | 1410 | */ |
1342 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | 1411 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
1343 | unsigned long pfn) | 1412 | unsigned long pfn) |
@@ -1548,6 +1617,8 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, | |||
1548 | unsigned long next; | 1617 | unsigned long next; |
1549 | int err; | 1618 | int err; |
1550 | 1619 | ||
1620 | BUG_ON(pud_huge(*pud)); | ||
1621 | |||
1551 | pmd = pmd_alloc(mm, pud, addr); | 1622 | pmd = pmd_alloc(mm, pud, addr); |
1552 | if (!pmd) | 1623 | if (!pmd) |
1553 | return -ENOMEM; | 1624 | return -ENOMEM; |
@@ -1589,10 +1660,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, | |||
1589 | { | 1660 | { |
1590 | pgd_t *pgd; | 1661 | pgd_t *pgd; |
1591 | unsigned long next; | 1662 | unsigned long next; |
1592 | unsigned long end = addr + size; | 1663 | unsigned long start = addr, end = addr + size; |
1593 | int err; | 1664 | int err; |
1594 | 1665 | ||
1595 | BUG_ON(addr >= end); | 1666 | BUG_ON(addr >= end); |
1667 | mmu_notifier_invalidate_range_start(mm, start, end); | ||
1596 | pgd = pgd_offset(mm, addr); | 1668 | pgd = pgd_offset(mm, addr); |
1597 | do { | 1669 | do { |
1598 | next = pgd_addr_end(addr, end); | 1670 | next = pgd_addr_end(addr, end); |
@@ -1600,6 +1672,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, | |||
1600 | if (err) | 1672 | if (err) |
1601 | break; | 1673 | break; |
1602 | } while (pgd++, addr = next, addr != end); | 1674 | } while (pgd++, addr = next, addr != end); |
1675 | mmu_notifier_invalidate_range_end(mm, start, end); | ||
1603 | return err; | 1676 | return err; |
1604 | } | 1677 | } |
1605 | EXPORT_SYMBOL_GPL(apply_to_page_range); | 1678 | EXPORT_SYMBOL_GPL(apply_to_page_range); |
@@ -1716,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1716 | * not dirty accountable. | 1789 | * not dirty accountable. |
1717 | */ | 1790 | */ |
1718 | if (PageAnon(old_page)) { | 1791 | if (PageAnon(old_page)) { |
1719 | if (!TestSetPageLocked(old_page)) { | 1792 | if (trylock_page(old_page)) { |
1720 | reuse = can_share_swap_page(old_page); | 1793 | reuse = can_share_swap_page(old_page); |
1721 | unlock_page(old_page); | 1794 | unlock_page(old_page); |
1722 | } | 1795 | } |
@@ -1812,7 +1885,7 @@ gotten: | |||
1812 | * seen in the presence of one thread doing SMC and another | 1885 | * seen in the presence of one thread doing SMC and another |
1813 | * thread doing COW. | 1886 | * thread doing COW. |
1814 | */ | 1887 | */ |
1815 | ptep_clear_flush(vma, address, page_table); | 1888 | ptep_clear_flush_notify(vma, address, page_table); |
1816 | set_pte_at(mm, address, page_table, entry); | 1889 | set_pte_at(mm, address, page_table, entry); |
1817 | update_mmu_cache(vma, address, entry); | 1890 | update_mmu_cache(vma, address, entry); |
1818 | lru_cache_add_active(new_page); | 1891 | lru_cache_add_active(new_page); |
@@ -2501,59 +2574,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2501 | return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); | 2574 | return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); |
2502 | } | 2575 | } |
2503 | 2576 | ||
2504 | |||
2505 | /* | ||
2506 | * do_no_pfn() tries to create a new page mapping for a page without | ||
2507 | * a struct_page backing it | ||
2508 | * | ||
2509 | * As this is called only for pages that do not currently exist, we | ||
2510 | * do not need to flush old virtual caches or the TLB. | ||
2511 | * | ||
2512 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
2513 | * but allow concurrent faults), and pte mapped but not yet locked. | ||
2514 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
2515 | * | ||
2516 | * It is expected that the ->nopfn handler always returns the same pfn | ||
2517 | * for a given virtual mapping. | ||
2518 | * | ||
2519 | * Mark this `noinline' to prevent it from bloating the main pagefault code. | ||
2520 | */ | ||
2521 | static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, | ||
2522 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
2523 | int write_access) | ||
2524 | { | ||
2525 | spinlock_t *ptl; | ||
2526 | pte_t entry; | ||
2527 | unsigned long pfn; | ||
2528 | |||
2529 | pte_unmap(page_table); | ||
2530 | BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); | ||
2531 | BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); | ||
2532 | |||
2533 | pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); | ||
2534 | |||
2535 | BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); | ||
2536 | |||
2537 | if (unlikely(pfn == NOPFN_OOM)) | ||
2538 | return VM_FAULT_OOM; | ||
2539 | else if (unlikely(pfn == NOPFN_SIGBUS)) | ||
2540 | return VM_FAULT_SIGBUS; | ||
2541 | else if (unlikely(pfn == NOPFN_REFAULT)) | ||
2542 | return 0; | ||
2543 | |||
2544 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
2545 | |||
2546 | /* Only go through if we didn't race with anybody else... */ | ||
2547 | if (pte_none(*page_table)) { | ||
2548 | entry = pfn_pte(pfn, vma->vm_page_prot); | ||
2549 | if (write_access) | ||
2550 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
2551 | set_pte_at(mm, address, page_table, entry); | ||
2552 | } | ||
2553 | pte_unmap_unlock(page_table, ptl); | ||
2554 | return 0; | ||
2555 | } | ||
2556 | |||
2557 | /* | 2577 | /* |
2558 | * Fault of a previously existing named mapping. Repopulate the pte | 2578 | * Fault of a previously existing named mapping. Repopulate the pte |
2559 | * from the encoded file_pte if possible. This enables swappable | 2579 | * from the encoded file_pte if possible. This enables swappable |
@@ -2614,9 +2634,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2614 | if (likely(vma->vm_ops->fault)) | 2634 | if (likely(vma->vm_ops->fault)) |
2615 | return do_linear_fault(mm, vma, address, | 2635 | return do_linear_fault(mm, vma, address, |
2616 | pte, pmd, write_access, entry); | 2636 | pte, pmd, write_access, entry); |
2617 | if (unlikely(vma->vm_ops->nopfn)) | ||
2618 | return do_no_pfn(mm, vma, address, pte, | ||
2619 | pmd, write_access); | ||
2620 | } | 2637 | } |
2621 | return do_anonymous_page(mm, vma, address, | 2638 | return do_anonymous_page(mm, vma, address, |
2622 | pte, pmd, write_access); | 2639 | pte, pmd, write_access); |
@@ -2748,16 +2765,26 @@ int make_pages_present(unsigned long addr, unsigned long end) | |||
2748 | 2765 | ||
2749 | vma = find_vma(current->mm, addr); | 2766 | vma = find_vma(current->mm, addr); |
2750 | if (!vma) | 2767 | if (!vma) |
2751 | return -1; | 2768 | return -ENOMEM; |
2752 | write = (vma->vm_flags & VM_WRITE) != 0; | 2769 | write = (vma->vm_flags & VM_WRITE) != 0; |
2753 | BUG_ON(addr >= end); | 2770 | BUG_ON(addr >= end); |
2754 | BUG_ON(end > vma->vm_end); | 2771 | BUG_ON(end > vma->vm_end); |
2755 | len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; | 2772 | len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; |
2756 | ret = get_user_pages(current, current->mm, addr, | 2773 | ret = get_user_pages(current, current->mm, addr, |
2757 | len, write, 0, NULL, NULL); | 2774 | len, write, 0, NULL, NULL); |
2758 | if (ret < 0) | 2775 | if (ret < 0) { |
2776 | /* | ||
2777 | SUS require strange return value to mlock | ||
2778 | - invalid addr generate to ENOMEM. | ||
2779 | - out of memory should generate EAGAIN. | ||
2780 | */ | ||
2781 | if (ret == -EFAULT) | ||
2782 | ret = -ENOMEM; | ||
2783 | else if (ret == -ENOMEM) | ||
2784 | ret = -EAGAIN; | ||
2759 | return ret; | 2785 | return ret; |
2760 | return ret == len ? 0 : -1; | 2786 | } |
2787 | return ret == len ? 0 : -ENOMEM; | ||
2761 | } | 2788 | } |
2762 | 2789 | ||
2763 | #if !defined(__HAVE_ARCH_GATE_AREA) | 2790 | #if !defined(__HAVE_ARCH_GATE_AREA) |
@@ -2804,6 +2831,86 @@ int in_gate_area_no_task(unsigned long addr) | |||
2804 | 2831 | ||
2805 | #endif /* __HAVE_ARCH_GATE_AREA */ | 2832 | #endif /* __HAVE_ARCH_GATE_AREA */ |
2806 | 2833 | ||
2834 | #ifdef CONFIG_HAVE_IOREMAP_PROT | ||
2835 | static resource_size_t follow_phys(struct vm_area_struct *vma, | ||
2836 | unsigned long address, unsigned int flags, | ||
2837 | unsigned long *prot) | ||
2838 | { | ||
2839 | pgd_t *pgd; | ||
2840 | pud_t *pud; | ||
2841 | pmd_t *pmd; | ||
2842 | pte_t *ptep, pte; | ||
2843 | spinlock_t *ptl; | ||
2844 | resource_size_t phys_addr = 0; | ||
2845 | struct mm_struct *mm = vma->vm_mm; | ||
2846 | |||
2847 | VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); | ||
2848 | |||
2849 | pgd = pgd_offset(mm, address); | ||
2850 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | ||
2851 | goto no_page_table; | ||
2852 | |||
2853 | pud = pud_offset(pgd, address); | ||
2854 | if (pud_none(*pud) || unlikely(pud_bad(*pud))) | ||
2855 | goto no_page_table; | ||
2856 | |||
2857 | pmd = pmd_offset(pud, address); | ||
2858 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) | ||
2859 | goto no_page_table; | ||
2860 | |||
2861 | /* We cannot handle huge page PFN maps. Luckily they don't exist. */ | ||
2862 | if (pmd_huge(*pmd)) | ||
2863 | goto no_page_table; | ||
2864 | |||
2865 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
2866 | if (!ptep) | ||
2867 | goto out; | ||
2868 | |||
2869 | pte = *ptep; | ||
2870 | if (!pte_present(pte)) | ||
2871 | goto unlock; | ||
2872 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | ||
2873 | goto unlock; | ||
2874 | phys_addr = pte_pfn(pte); | ||
2875 | phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ | ||
2876 | |||
2877 | *prot = pgprot_val(pte_pgprot(pte)); | ||
2878 | |||
2879 | unlock: | ||
2880 | pte_unmap_unlock(ptep, ptl); | ||
2881 | out: | ||
2882 | return phys_addr; | ||
2883 | no_page_table: | ||
2884 | return 0; | ||
2885 | } | ||
2886 | |||
2887 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | ||
2888 | void *buf, int len, int write) | ||
2889 | { | ||
2890 | resource_size_t phys_addr; | ||
2891 | unsigned long prot = 0; | ||
2892 | void *maddr; | ||
2893 | int offset = addr & (PAGE_SIZE-1); | ||
2894 | |||
2895 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||
2896 | return -EINVAL; | ||
2897 | |||
2898 | phys_addr = follow_phys(vma, addr, write, &prot); | ||
2899 | |||
2900 | if (!phys_addr) | ||
2901 | return -EINVAL; | ||
2902 | |||
2903 | maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); | ||
2904 | if (write) | ||
2905 | memcpy_toio(maddr + offset, buf, len); | ||
2906 | else | ||
2907 | memcpy_fromio(buf, maddr + offset, len); | ||
2908 | iounmap(maddr); | ||
2909 | |||
2910 | return len; | ||
2911 | } | ||
2912 | #endif | ||
2913 | |||
2807 | /* | 2914 | /* |
2808 | * Access another process' address space. | 2915 | * Access another process' address space. |
2809 | * Source/target buffer must be kernel space, | 2916 | * Source/target buffer must be kernel space, |
@@ -2813,7 +2920,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
2813 | { | 2920 | { |
2814 | struct mm_struct *mm; | 2921 | struct mm_struct *mm; |
2815 | struct vm_area_struct *vma; | 2922 | struct vm_area_struct *vma; |
2816 | struct page *page; | ||
2817 | void *old_buf = buf; | 2923 | void *old_buf = buf; |
2818 | 2924 | ||
2819 | mm = get_task_mm(tsk); | 2925 | mm = get_task_mm(tsk); |
@@ -2825,28 +2931,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
2825 | while (len) { | 2931 | while (len) { |
2826 | int bytes, ret, offset; | 2932 | int bytes, ret, offset; |
2827 | void *maddr; | 2933 | void *maddr; |
2934 | struct page *page = NULL; | ||
2828 | 2935 | ||
2829 | ret = get_user_pages(tsk, mm, addr, 1, | 2936 | ret = get_user_pages(tsk, mm, addr, 1, |
2830 | write, 1, &page, &vma); | 2937 | write, 1, &page, &vma); |
2831 | if (ret <= 0) | 2938 | if (ret <= 0) { |
2832 | break; | 2939 | /* |
2833 | 2940 | * Check if this is a VM_IO | VM_PFNMAP VMA, which | |
2834 | bytes = len; | 2941 | * we can access using slightly different code. |
2835 | offset = addr & (PAGE_SIZE-1); | 2942 | */ |
2836 | if (bytes > PAGE_SIZE-offset) | 2943 | #ifdef CONFIG_HAVE_IOREMAP_PROT |
2837 | bytes = PAGE_SIZE-offset; | 2944 | vma = find_vma(mm, addr); |
2838 | 2945 | if (!vma) | |
2839 | maddr = kmap(page); | 2946 | break; |
2840 | if (write) { | 2947 | if (vma->vm_ops && vma->vm_ops->access) |
2841 | copy_to_user_page(vma, page, addr, | 2948 | ret = vma->vm_ops->access(vma, addr, buf, |
2842 | maddr + offset, buf, bytes); | 2949 | len, write); |
2843 | set_page_dirty_lock(page); | 2950 | if (ret <= 0) |
2951 | #endif | ||
2952 | break; | ||
2953 | bytes = ret; | ||
2844 | } else { | 2954 | } else { |
2845 | copy_from_user_page(vma, page, addr, | 2955 | bytes = len; |
2846 | buf, maddr + offset, bytes); | 2956 | offset = addr & (PAGE_SIZE-1); |
2957 | if (bytes > PAGE_SIZE-offset) | ||
2958 | bytes = PAGE_SIZE-offset; | ||
2959 | |||
2960 | maddr = kmap(page); | ||
2961 | if (write) { | ||
2962 | copy_to_user_page(vma, page, addr, | ||
2963 | maddr + offset, buf, bytes); | ||
2964 | set_page_dirty_lock(page); | ||
2965 | } else { | ||
2966 | copy_from_user_page(vma, page, addr, | ||
2967 | buf, maddr + offset, bytes); | ||
2968 | } | ||
2969 | kunmap(page); | ||
2970 | page_cache_release(page); | ||
2847 | } | 2971 | } |
2848 | kunmap(page); | ||
2849 | page_cache_release(page); | ||
2850 | len -= bytes; | 2972 | len -= bytes; |
2851 | buf += bytes; | 2973 | buf += bytes; |
2852 | addr += bytes; | 2974 | addr += bytes; |