diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-08-14 06:19:59 -0400 |
| commit | 8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch) | |
| tree | 8129b5907161bc6ae26deb3645ce1e280c5e1f51 /mm/memory.c | |
| parent | b2139aa0eec330c711c5a279db361e5ef1178e78 (diff) | |
| parent | 30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff) | |
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts:
include/asm-x86/dma-mapping.h
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 322 |
1 files changed, 222 insertions, 100 deletions
diff --git a/mm/memory.c b/mm/memory.c index 2302d228fe04..1002f473f497 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -51,6 +51,7 @@ | |||
| 51 | #include <linux/init.h> | 51 | #include <linux/init.h> |
| 52 | #include <linux/writeback.h> | 52 | #include <linux/writeback.h> |
| 53 | #include <linux/memcontrol.h> | 53 | #include <linux/memcontrol.h> |
| 54 | #include <linux/mmu_notifier.h> | ||
| 54 | 55 | ||
| 55 | #include <asm/pgalloc.h> | 56 | #include <asm/pgalloc.h> |
| 56 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
| @@ -61,6 +62,8 @@ | |||
| 61 | #include <linux/swapops.h> | 62 | #include <linux/swapops.h> |
| 62 | #include <linux/elf.h> | 63 | #include <linux/elf.h> |
| 63 | 64 | ||
| 65 | #include "internal.h" | ||
| 66 | |||
| 64 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 67 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
| 65 | /* use the per-pgdat data instead for discontigmem - mbligh */ | 68 | /* use the per-pgdat data instead for discontigmem - mbligh */ |
| 66 | unsigned long max_mapnr; | 69 | unsigned long max_mapnr; |
| @@ -211,7 +214,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
| 211 | * | 214 | * |
| 212 | * Must be called with pagetable lock held. | 215 | * Must be called with pagetable lock held. |
| 213 | */ | 216 | */ |
| 214 | void free_pgd_range(struct mmu_gather **tlb, | 217 | void free_pgd_range(struct mmu_gather *tlb, |
| 215 | unsigned long addr, unsigned long end, | 218 | unsigned long addr, unsigned long end, |
| 216 | unsigned long floor, unsigned long ceiling) | 219 | unsigned long floor, unsigned long ceiling) |
| 217 | { | 220 | { |
| @@ -262,16 +265,16 @@ void free_pgd_range(struct mmu_gather **tlb, | |||
| 262 | return; | 265 | return; |
| 263 | 266 | ||
| 264 | start = addr; | 267 | start = addr; |
| 265 | pgd = pgd_offset((*tlb)->mm, addr); | 268 | pgd = pgd_offset(tlb->mm, addr); |
| 266 | do { | 269 | do { |
| 267 | next = pgd_addr_end(addr, end); | 270 | next = pgd_addr_end(addr, end); |
| 268 | if (pgd_none_or_clear_bad(pgd)) | 271 | if (pgd_none_or_clear_bad(pgd)) |
| 269 | continue; | 272 | continue; |
| 270 | free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | 273 | free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
| 271 | } while (pgd++, addr = next, addr != end); | 274 | } while (pgd++, addr = next, addr != end); |
| 272 | } | 275 | } |
| 273 | 276 | ||
| 274 | void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma, | 277 | void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, |
| 275 | unsigned long floor, unsigned long ceiling) | 278 | unsigned long floor, unsigned long ceiling) |
| 276 | { | 279 | { |
| 277 | while (vma) { | 280 | while (vma) { |
| @@ -372,7 +375,8 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
| 372 | * | 375 | * |
| 373 | * The calling function must still handle the error. | 376 | * The calling function must still handle the error. |
| 374 | */ | 377 | */ |
| 375 | void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) | 378 | static void print_bad_pte(struct vm_area_struct *vma, pte_t pte, |
| 379 | unsigned long vaddr) | ||
| 376 | { | 380 | { |
| 377 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " | 381 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " |
| 378 | "vm_flags = %lx, vaddr = %lx\n", | 382 | "vm_flags = %lx, vaddr = %lx\n", |
| @@ -649,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 649 | unsigned long next; | 653 | unsigned long next; |
| 650 | unsigned long addr = vma->vm_start; | 654 | unsigned long addr = vma->vm_start; |
| 651 | unsigned long end = vma->vm_end; | 655 | unsigned long end = vma->vm_end; |
| 656 | int ret; | ||
| 652 | 657 | ||
| 653 | /* | 658 | /* |
| 654 | * Don't copy ptes where a page fault will fill them correctly. | 659 | * Don't copy ptes where a page fault will fill them correctly. |
| @@ -664,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 664 | if (is_vm_hugetlb_page(vma)) | 669 | if (is_vm_hugetlb_page(vma)) |
| 665 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); | 670 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); |
| 666 | 671 | ||
| 672 | /* | ||
| 673 | * We need to invalidate the secondary MMU mappings only when | ||
| 674 | * there could be a permission downgrade on the ptes of the | ||
| 675 | * parent mm. And a permission downgrade will only happen if | ||
| 676 | * is_cow_mapping() returns true. | ||
| 677 | */ | ||
| 678 | if (is_cow_mapping(vma->vm_flags)) | ||
| 679 | mmu_notifier_invalidate_range_start(src_mm, addr, end); | ||
| 680 | |||
| 681 | ret = 0; | ||
| 667 | dst_pgd = pgd_offset(dst_mm, addr); | 682 | dst_pgd = pgd_offset(dst_mm, addr); |
| 668 | src_pgd = pgd_offset(src_mm, addr); | 683 | src_pgd = pgd_offset(src_mm, addr); |
| 669 | do { | 684 | do { |
| 670 | next = pgd_addr_end(addr, end); | 685 | next = pgd_addr_end(addr, end); |
| 671 | if (pgd_none_or_clear_bad(src_pgd)) | 686 | if (pgd_none_or_clear_bad(src_pgd)) |
| 672 | continue; | 687 | continue; |
| 673 | if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, | 688 | if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, |
| 674 | vma, addr, next)) | 689 | vma, addr, next))) { |
| 675 | return -ENOMEM; | 690 | ret = -ENOMEM; |
| 691 | break; | ||
| 692 | } | ||
| 676 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); | 693 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); |
| 677 | return 0; | 694 | |
| 695 | if (is_cow_mapping(vma->vm_flags)) | ||
| 696 | mmu_notifier_invalidate_range_end(src_mm, | ||
| 697 | vma->vm_start, end); | ||
| 698 | return ret; | ||
| 678 | } | 699 | } |
| 679 | 700 | ||
| 680 | static unsigned long zap_pte_range(struct mmu_gather *tlb, | 701 | static unsigned long zap_pte_range(struct mmu_gather *tlb, |
| @@ -878,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
| 878 | unsigned long start = start_addr; | 899 | unsigned long start = start_addr; |
| 879 | spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; | 900 | spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; |
| 880 | int fullmm = (*tlbp)->fullmm; | 901 | int fullmm = (*tlbp)->fullmm; |
| 902 | struct mm_struct *mm = vma->vm_mm; | ||
| 881 | 903 | ||
| 904 | mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); | ||
| 882 | for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { | 905 | for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { |
| 883 | unsigned long end; | 906 | unsigned long end; |
| 884 | 907 | ||
| @@ -899,9 +922,23 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
| 899 | } | 922 | } |
| 900 | 923 | ||
| 901 | if (unlikely(is_vm_hugetlb_page(vma))) { | 924 | if (unlikely(is_vm_hugetlb_page(vma))) { |
| 902 | unmap_hugepage_range(vma, start, end); | 925 | /* |
| 903 | zap_work -= (end - start) / | 926 | * It is undesirable to test vma->vm_file as it |
| 904 | (HPAGE_SIZE / PAGE_SIZE); | 927 | * should be non-null for valid hugetlb area. |
| 928 | * However, vm_file will be NULL in the error | ||
| 929 | * cleanup path of do_mmap_pgoff. When | ||
| 930 | * hugetlbfs ->mmap method fails, | ||
| 931 | * do_mmap_pgoff() nullifies vma->vm_file | ||
| 932 | * before calling this function to clean up. | ||
| 933 | * Since no pte has actually been setup, it is | ||
| 934 | * safe to do nothing in this case. | ||
| 935 | */ | ||
| 936 | if (vma->vm_file) { | ||
| 937 | unmap_hugepage_range(vma, start, end, NULL); | ||
| 938 | zap_work -= (end - start) / | ||
| 939 | pages_per_huge_page(hstate_vma(vma)); | ||
| 940 | } | ||
| 941 | |||
| 905 | start = end; | 942 | start = end; |
| 906 | } else | 943 | } else |
| 907 | start = unmap_page_range(*tlbp, vma, | 944 | start = unmap_page_range(*tlbp, vma, |
| @@ -929,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, | |||
| 929 | } | 966 | } |
| 930 | } | 967 | } |
| 931 | out: | 968 | out: |
| 969 | mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); | ||
| 932 | return start; /* which is now the end (or restart) address */ | 970 | return start; /* which is now the end (or restart) address */ |
| 933 | } | 971 | } |
| 934 | 972 | ||
| @@ -956,6 +994,29 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, | |||
| 956 | return end; | 994 | return end; |
| 957 | } | 995 | } |
| 958 | 996 | ||
| 997 | /** | ||
| 998 | * zap_vma_ptes - remove ptes mapping the vma | ||
| 999 | * @vma: vm_area_struct holding ptes to be zapped | ||
| 1000 | * @address: starting address of pages to zap | ||
| 1001 | * @size: number of bytes to zap | ||
| 1002 | * | ||
| 1003 | * This function only unmaps ptes assigned to VM_PFNMAP vmas. | ||
| 1004 | * | ||
| 1005 | * The entire address range must be fully contained within the vma. | ||
| 1006 | * | ||
| 1007 | * Returns 0 if successful. | ||
| 1008 | */ | ||
| 1009 | int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, | ||
| 1010 | unsigned long size) | ||
| 1011 | { | ||
| 1012 | if (address < vma->vm_start || address + size > vma->vm_end || | ||
| 1013 | !(vma->vm_flags & VM_PFNMAP)) | ||
| 1014 | return -1; | ||
| 1015 | zap_page_range(vma, address, size, NULL); | ||
| 1016 | return 0; | ||
| 1017 | } | ||
| 1018 | EXPORT_SYMBOL_GPL(zap_vma_ptes); | ||
| 1019 | |||
| 959 | /* | 1020 | /* |
| 960 | * Do a quick page-table lookup for a single page. | 1021 | * Do a quick page-table lookup for a single page. |
| 961 | */ | 1022 | */ |
| @@ -982,19 +1043,24 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
| 982 | goto no_page_table; | 1043 | goto no_page_table; |
| 983 | 1044 | ||
| 984 | pud = pud_offset(pgd, address); | 1045 | pud = pud_offset(pgd, address); |
| 985 | if (pud_none(*pud) || unlikely(pud_bad(*pud))) | 1046 | if (pud_none(*pud)) |
| 986 | goto no_page_table; | 1047 | goto no_page_table; |
| 987 | 1048 | if (pud_huge(*pud)) { | |
| 1049 | BUG_ON(flags & FOLL_GET); | ||
| 1050 | page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); | ||
| 1051 | goto out; | ||
| 1052 | } | ||
| 1053 | if (unlikely(pud_bad(*pud))) | ||
| 1054 | goto no_page_table; | ||
| 1055 | |||
| 988 | pmd = pmd_offset(pud, address); | 1056 | pmd = pmd_offset(pud, address); |
| 989 | if (pmd_none(*pmd)) | 1057 | if (pmd_none(*pmd)) |
| 990 | goto no_page_table; | 1058 | goto no_page_table; |
| 991 | |||
| 992 | if (pmd_huge(*pmd)) { | 1059 | if (pmd_huge(*pmd)) { |
| 993 | BUG_ON(flags & FOLL_GET); | 1060 | BUG_ON(flags & FOLL_GET); |
| 994 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); | 1061 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); |
| 995 | goto out; | 1062 | goto out; |
| 996 | } | 1063 | } |
| 997 | |||
| 998 | if (unlikely(pmd_bad(*pmd))) | 1064 | if (unlikely(pmd_bad(*pmd))) |
| 999 | goto no_page_table; | 1065 | goto no_page_table; |
| 1000 | 1066 | ||
| @@ -1058,11 +1124,9 @@ static inline int use_zero_page(struct vm_area_struct *vma) | |||
| 1058 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) | 1124 | if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) |
| 1059 | return 0; | 1125 | return 0; |
| 1060 | /* | 1126 | /* |
| 1061 | * And if we have a fault or a nopfn routine, it's not an | 1127 | * And if we have a fault routine, it's not an anonymous region. |
| 1062 | * anonymous region. | ||
| 1063 | */ | 1128 | */ |
| 1064 | return !vma->vm_ops || | 1129 | return !vma->vm_ops || !vma->vm_ops->fault; |
| 1065 | (!vma->vm_ops->fault && !vma->vm_ops->nopfn); | ||
| 1066 | } | 1130 | } |
| 1067 | 1131 | ||
| 1068 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | 1132 | int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, |
| @@ -1338,6 +1402,11 @@ out: | |||
| 1338 | * | 1402 | * |
| 1339 | * This function should only be called from a vm_ops->fault handler, and | 1403 | * This function should only be called from a vm_ops->fault handler, and |
| 1340 | * in that case the handler should return NULL. | 1404 | * in that case the handler should return NULL. |
| 1405 | * | ||
| 1406 | * vma cannot be a COW mapping. | ||
| 1407 | * | ||
| 1408 | * As this is called only for pages that do not currently exist, we | ||
| 1409 | * do not need to flush old virtual caches or the TLB. | ||
| 1341 | */ | 1410 | */ |
| 1342 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | 1411 | int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, |
| 1343 | unsigned long pfn) | 1412 | unsigned long pfn) |
| @@ -1548,6 +1617,8 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, | |||
| 1548 | unsigned long next; | 1617 | unsigned long next; |
| 1549 | int err; | 1618 | int err; |
| 1550 | 1619 | ||
| 1620 | BUG_ON(pud_huge(*pud)); | ||
| 1621 | |||
| 1551 | pmd = pmd_alloc(mm, pud, addr); | 1622 | pmd = pmd_alloc(mm, pud, addr); |
| 1552 | if (!pmd) | 1623 | if (!pmd) |
| 1553 | return -ENOMEM; | 1624 | return -ENOMEM; |
| @@ -1589,10 +1660,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, | |||
| 1589 | { | 1660 | { |
| 1590 | pgd_t *pgd; | 1661 | pgd_t *pgd; |
| 1591 | unsigned long next; | 1662 | unsigned long next; |
| 1592 | unsigned long end = addr + size; | 1663 | unsigned long start = addr, end = addr + size; |
| 1593 | int err; | 1664 | int err; |
| 1594 | 1665 | ||
| 1595 | BUG_ON(addr >= end); | 1666 | BUG_ON(addr >= end); |
| 1667 | mmu_notifier_invalidate_range_start(mm, start, end); | ||
| 1596 | pgd = pgd_offset(mm, addr); | 1668 | pgd = pgd_offset(mm, addr); |
| 1597 | do { | 1669 | do { |
| 1598 | next = pgd_addr_end(addr, end); | 1670 | next = pgd_addr_end(addr, end); |
| @@ -1600,6 +1672,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, | |||
| 1600 | if (err) | 1672 | if (err) |
| 1601 | break; | 1673 | break; |
| 1602 | } while (pgd++, addr = next, addr != end); | 1674 | } while (pgd++, addr = next, addr != end); |
| 1675 | mmu_notifier_invalidate_range_end(mm, start, end); | ||
| 1603 | return err; | 1676 | return err; |
| 1604 | } | 1677 | } |
| 1605 | EXPORT_SYMBOL_GPL(apply_to_page_range); | 1678 | EXPORT_SYMBOL_GPL(apply_to_page_range); |
| @@ -1716,7 +1789,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 1716 | * not dirty accountable. | 1789 | * not dirty accountable. |
| 1717 | */ | 1790 | */ |
| 1718 | if (PageAnon(old_page)) { | 1791 | if (PageAnon(old_page)) { |
| 1719 | if (!TestSetPageLocked(old_page)) { | 1792 | if (trylock_page(old_page)) { |
| 1720 | reuse = can_share_swap_page(old_page); | 1793 | reuse = can_share_swap_page(old_page); |
| 1721 | unlock_page(old_page); | 1794 | unlock_page(old_page); |
| 1722 | } | 1795 | } |
| @@ -1812,7 +1885,7 @@ gotten: | |||
| 1812 | * seen in the presence of one thread doing SMC and another | 1885 | * seen in the presence of one thread doing SMC and another |
| 1813 | * thread doing COW. | 1886 | * thread doing COW. |
| 1814 | */ | 1887 | */ |
| 1815 | ptep_clear_flush(vma, address, page_table); | 1888 | ptep_clear_flush_notify(vma, address, page_table); |
| 1816 | set_pte_at(mm, address, page_table, entry); | 1889 | set_pte_at(mm, address, page_table, entry); |
| 1817 | update_mmu_cache(vma, address, entry); | 1890 | update_mmu_cache(vma, address, entry); |
| 1818 | lru_cache_add_active(new_page); | 1891 | lru_cache_add_active(new_page); |
| @@ -2501,59 +2574,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2501 | return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); | 2574 | return __do_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); |
| 2502 | } | 2575 | } |
| 2503 | 2576 | ||
| 2504 | |||
| 2505 | /* | ||
| 2506 | * do_no_pfn() tries to create a new page mapping for a page without | ||
| 2507 | * a struct_page backing it | ||
| 2508 | * | ||
| 2509 | * As this is called only for pages that do not currently exist, we | ||
| 2510 | * do not need to flush old virtual caches or the TLB. | ||
| 2511 | * | ||
| 2512 | * We enter with non-exclusive mmap_sem (to exclude vma changes, | ||
| 2513 | * but allow concurrent faults), and pte mapped but not yet locked. | ||
| 2514 | * We return with mmap_sem still held, but pte unmapped and unlocked. | ||
| 2515 | * | ||
| 2516 | * It is expected that the ->nopfn handler always returns the same pfn | ||
| 2517 | * for a given virtual mapping. | ||
| 2518 | * | ||
| 2519 | * Mark this `noinline' to prevent it from bloating the main pagefault code. | ||
| 2520 | */ | ||
| 2521 | static noinline int do_no_pfn(struct mm_struct *mm, struct vm_area_struct *vma, | ||
| 2522 | unsigned long address, pte_t *page_table, pmd_t *pmd, | ||
| 2523 | int write_access) | ||
| 2524 | { | ||
| 2525 | spinlock_t *ptl; | ||
| 2526 | pte_t entry; | ||
| 2527 | unsigned long pfn; | ||
| 2528 | |||
| 2529 | pte_unmap(page_table); | ||
| 2530 | BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); | ||
| 2531 | BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); | ||
| 2532 | |||
| 2533 | pfn = vma->vm_ops->nopfn(vma, address & PAGE_MASK); | ||
| 2534 | |||
| 2535 | BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); | ||
| 2536 | |||
| 2537 | if (unlikely(pfn == NOPFN_OOM)) | ||
| 2538 | return VM_FAULT_OOM; | ||
| 2539 | else if (unlikely(pfn == NOPFN_SIGBUS)) | ||
| 2540 | return VM_FAULT_SIGBUS; | ||
| 2541 | else if (unlikely(pfn == NOPFN_REFAULT)) | ||
| 2542 | return 0; | ||
| 2543 | |||
| 2544 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
| 2545 | |||
| 2546 | /* Only go through if we didn't race with anybody else... */ | ||
| 2547 | if (pte_none(*page_table)) { | ||
| 2548 | entry = pfn_pte(pfn, vma->vm_page_prot); | ||
| 2549 | if (write_access) | ||
| 2550 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
| 2551 | set_pte_at(mm, address, page_table, entry); | ||
| 2552 | } | ||
| 2553 | pte_unmap_unlock(page_table, ptl); | ||
| 2554 | return 0; | ||
| 2555 | } | ||
| 2556 | |||
| 2557 | /* | 2577 | /* |
| 2558 | * Fault of a previously existing named mapping. Repopulate the pte | 2578 | * Fault of a previously existing named mapping. Repopulate the pte |
| 2559 | * from the encoded file_pte if possible. This enables swappable | 2579 | * from the encoded file_pte if possible. This enables swappable |
| @@ -2614,9 +2634,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
| 2614 | if (likely(vma->vm_ops->fault)) | 2634 | if (likely(vma->vm_ops->fault)) |
| 2615 | return do_linear_fault(mm, vma, address, | 2635 | return do_linear_fault(mm, vma, address, |
| 2616 | pte, pmd, write_access, entry); | 2636 | pte, pmd, write_access, entry); |
| 2617 | if (unlikely(vma->vm_ops->nopfn)) | ||
| 2618 | return do_no_pfn(mm, vma, address, pte, | ||
| 2619 | pmd, write_access); | ||
| 2620 | } | 2637 | } |
| 2621 | return do_anonymous_page(mm, vma, address, | 2638 | return do_anonymous_page(mm, vma, address, |
| 2622 | pte, pmd, write_access); | 2639 | pte, pmd, write_access); |
| @@ -2748,16 +2765,26 @@ int make_pages_present(unsigned long addr, unsigned long end) | |||
| 2748 | 2765 | ||
| 2749 | vma = find_vma(current->mm, addr); | 2766 | vma = find_vma(current->mm, addr); |
| 2750 | if (!vma) | 2767 | if (!vma) |
| 2751 | return -1; | 2768 | return -ENOMEM; |
| 2752 | write = (vma->vm_flags & VM_WRITE) != 0; | 2769 | write = (vma->vm_flags & VM_WRITE) != 0; |
| 2753 | BUG_ON(addr >= end); | 2770 | BUG_ON(addr >= end); |
| 2754 | BUG_ON(end > vma->vm_end); | 2771 | BUG_ON(end > vma->vm_end); |
| 2755 | len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; | 2772 | len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE; |
| 2756 | ret = get_user_pages(current, current->mm, addr, | 2773 | ret = get_user_pages(current, current->mm, addr, |
| 2757 | len, write, 0, NULL, NULL); | 2774 | len, write, 0, NULL, NULL); |
| 2758 | if (ret < 0) | 2775 | if (ret < 0) { |
| 2776 | /* | ||
| 2777 | SUS require strange return value to mlock | ||
| 2778 | - invalid addr generate to ENOMEM. | ||
| 2779 | - out of memory should generate EAGAIN. | ||
| 2780 | */ | ||
| 2781 | if (ret == -EFAULT) | ||
| 2782 | ret = -ENOMEM; | ||
| 2783 | else if (ret == -ENOMEM) | ||
| 2784 | ret = -EAGAIN; | ||
| 2759 | return ret; | 2785 | return ret; |
| 2760 | return ret == len ? 0 : -1; | 2786 | } |
| 2787 | return ret == len ? 0 : -ENOMEM; | ||
| 2761 | } | 2788 | } |
| 2762 | 2789 | ||
| 2763 | #if !defined(__HAVE_ARCH_GATE_AREA) | 2790 | #if !defined(__HAVE_ARCH_GATE_AREA) |
| @@ -2804,6 +2831,86 @@ int in_gate_area_no_task(unsigned long addr) | |||
| 2804 | 2831 | ||
| 2805 | #endif /* __HAVE_ARCH_GATE_AREA */ | 2832 | #endif /* __HAVE_ARCH_GATE_AREA */ |
| 2806 | 2833 | ||
| 2834 | #ifdef CONFIG_HAVE_IOREMAP_PROT | ||
| 2835 | static resource_size_t follow_phys(struct vm_area_struct *vma, | ||
| 2836 | unsigned long address, unsigned int flags, | ||
| 2837 | unsigned long *prot) | ||
| 2838 | { | ||
| 2839 | pgd_t *pgd; | ||
| 2840 | pud_t *pud; | ||
| 2841 | pmd_t *pmd; | ||
| 2842 | pte_t *ptep, pte; | ||
| 2843 | spinlock_t *ptl; | ||
| 2844 | resource_size_t phys_addr = 0; | ||
| 2845 | struct mm_struct *mm = vma->vm_mm; | ||
| 2846 | |||
| 2847 | VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); | ||
| 2848 | |||
| 2849 | pgd = pgd_offset(mm, address); | ||
| 2850 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | ||
| 2851 | goto no_page_table; | ||
| 2852 | |||
| 2853 | pud = pud_offset(pgd, address); | ||
| 2854 | if (pud_none(*pud) || unlikely(pud_bad(*pud))) | ||
| 2855 | goto no_page_table; | ||
| 2856 | |||
| 2857 | pmd = pmd_offset(pud, address); | ||
| 2858 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) | ||
| 2859 | goto no_page_table; | ||
| 2860 | |||
| 2861 | /* We cannot handle huge page PFN maps. Luckily they don't exist. */ | ||
| 2862 | if (pmd_huge(*pmd)) | ||
| 2863 | goto no_page_table; | ||
| 2864 | |||
| 2865 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
| 2866 | if (!ptep) | ||
| 2867 | goto out; | ||
| 2868 | |||
| 2869 | pte = *ptep; | ||
| 2870 | if (!pte_present(pte)) | ||
| 2871 | goto unlock; | ||
| 2872 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | ||
| 2873 | goto unlock; | ||
| 2874 | phys_addr = pte_pfn(pte); | ||
| 2875 | phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ | ||
| 2876 | |||
| 2877 | *prot = pgprot_val(pte_pgprot(pte)); | ||
| 2878 | |||
| 2879 | unlock: | ||
| 2880 | pte_unmap_unlock(ptep, ptl); | ||
| 2881 | out: | ||
| 2882 | return phys_addr; | ||
| 2883 | no_page_table: | ||
| 2884 | return 0; | ||
| 2885 | } | ||
| 2886 | |||
| 2887 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | ||
| 2888 | void *buf, int len, int write) | ||
| 2889 | { | ||
| 2890 | resource_size_t phys_addr; | ||
| 2891 | unsigned long prot = 0; | ||
| 2892 | void *maddr; | ||
| 2893 | int offset = addr & (PAGE_SIZE-1); | ||
| 2894 | |||
| 2895 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||
| 2896 | return -EINVAL; | ||
| 2897 | |||
| 2898 | phys_addr = follow_phys(vma, addr, write, &prot); | ||
| 2899 | |||
| 2900 | if (!phys_addr) | ||
| 2901 | return -EINVAL; | ||
| 2902 | |||
| 2903 | maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); | ||
| 2904 | if (write) | ||
| 2905 | memcpy_toio(maddr + offset, buf, len); | ||
| 2906 | else | ||
| 2907 | memcpy_fromio(buf, maddr + offset, len); | ||
| 2908 | iounmap(maddr); | ||
| 2909 | |||
| 2910 | return len; | ||
| 2911 | } | ||
| 2912 | #endif | ||
| 2913 | |||
| 2807 | /* | 2914 | /* |
| 2808 | * Access another process' address space. | 2915 | * Access another process' address space. |
| 2809 | * Source/target buffer must be kernel space, | 2916 | * Source/target buffer must be kernel space, |
| @@ -2813,7 +2920,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
| 2813 | { | 2920 | { |
| 2814 | struct mm_struct *mm; | 2921 | struct mm_struct *mm; |
| 2815 | struct vm_area_struct *vma; | 2922 | struct vm_area_struct *vma; |
| 2816 | struct page *page; | ||
| 2817 | void *old_buf = buf; | 2923 | void *old_buf = buf; |
| 2818 | 2924 | ||
| 2819 | mm = get_task_mm(tsk); | 2925 | mm = get_task_mm(tsk); |
| @@ -2825,28 +2931,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
| 2825 | while (len) { | 2931 | while (len) { |
| 2826 | int bytes, ret, offset; | 2932 | int bytes, ret, offset; |
| 2827 | void *maddr; | 2933 | void *maddr; |
| 2934 | struct page *page = NULL; | ||
| 2828 | 2935 | ||
| 2829 | ret = get_user_pages(tsk, mm, addr, 1, | 2936 | ret = get_user_pages(tsk, mm, addr, 1, |
| 2830 | write, 1, &page, &vma); | 2937 | write, 1, &page, &vma); |
| 2831 | if (ret <= 0) | 2938 | if (ret <= 0) { |
| 2832 | break; | 2939 | /* |
| 2833 | 2940 | * Check if this is a VM_IO | VM_PFNMAP VMA, which | |
| 2834 | bytes = len; | 2941 | * we can access using slightly different code. |
| 2835 | offset = addr & (PAGE_SIZE-1); | 2942 | */ |
| 2836 | if (bytes > PAGE_SIZE-offset) | 2943 | #ifdef CONFIG_HAVE_IOREMAP_PROT |
| 2837 | bytes = PAGE_SIZE-offset; | 2944 | vma = find_vma(mm, addr); |
| 2838 | 2945 | if (!vma) | |
| 2839 | maddr = kmap(page); | 2946 | break; |
| 2840 | if (write) { | 2947 | if (vma->vm_ops && vma->vm_ops->access) |
| 2841 | copy_to_user_page(vma, page, addr, | 2948 | ret = vma->vm_ops->access(vma, addr, buf, |
| 2842 | maddr + offset, buf, bytes); | 2949 | len, write); |
| 2843 | set_page_dirty_lock(page); | 2950 | if (ret <= 0) |
| 2951 | #endif | ||
| 2952 | break; | ||
| 2953 | bytes = ret; | ||
| 2844 | } else { | 2954 | } else { |
| 2845 | copy_from_user_page(vma, page, addr, | 2955 | bytes = len; |
| 2846 | buf, maddr + offset, bytes); | 2956 | offset = addr & (PAGE_SIZE-1); |
| 2957 | if (bytes > PAGE_SIZE-offset) | ||
| 2958 | bytes = PAGE_SIZE-offset; | ||
| 2959 | |||
| 2960 | maddr = kmap(page); | ||
| 2961 | if (write) { | ||
| 2962 | copy_to_user_page(vma, page, addr, | ||
| 2963 | maddr + offset, buf, bytes); | ||
| 2964 | set_page_dirty_lock(page); | ||
| 2965 | } else { | ||
| 2966 | copy_from_user_page(vma, page, addr, | ||
| 2967 | buf, maddr + offset, bytes); | ||
| 2968 | } | ||
| 2969 | kunmap(page); | ||
| 2970 | page_cache_release(page); | ||
| 2847 | } | 2971 | } |
| 2848 | kunmap(page); | ||
| 2849 | page_cache_release(page); | ||
| 2850 | len -= bytes; | 2972 | len -= bytes; |
| 2851 | buf += bytes; | 2973 | buf += bytes; |
| 2852 | addr += bytes; | 2974 | addr += bytes; |
