diff options
| -rw-r--r-- | Documentation/filesystems/Locking | 7 | ||||
| -rw-r--r-- | arch/Kconfig | 3 | ||||
| -rw-r--r-- | arch/x86/Kconfig | 1 | ||||
| -rw-r--r-- | arch/x86/mm/ioremap.c | 8 | ||||
| -rw-r--r-- | include/asm-x86/io_32.h | 2 | ||||
| -rw-r--r-- | include/asm-x86/io_64.h | 2 | ||||
| -rw-r--r-- | include/linux/mm.h | 8 | ||||
| -rw-r--r-- | mm/memory.c | 131 |
8 files changed, 144 insertions, 18 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8b22d7d8b991..680fb566b928 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking | |||
| @@ -510,6 +510,7 @@ prototypes: | |||
| 510 | void (*close)(struct vm_area_struct*); | 510 | void (*close)(struct vm_area_struct*); |
| 511 | int (*fault)(struct vm_area_struct*, struct vm_fault *); | 511 | int (*fault)(struct vm_area_struct*, struct vm_fault *); |
| 512 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); | 512 | int (*page_mkwrite)(struct vm_area_struct *, struct page *); |
| 513 | int (*access)(struct vm_area_struct *, unsigned long, void*, int, int); | ||
| 513 | 514 | ||
| 514 | locking rules: | 515 | locking rules: |
| 515 | BKL mmap_sem PageLocked(page) | 516 | BKL mmap_sem PageLocked(page) |
| @@ -517,6 +518,7 @@ open: no yes | |||
| 517 | close: no yes | 518 | close: no yes |
| 518 | fault: no yes | 519 | fault: no yes |
| 519 | page_mkwrite: no yes no | 520 | page_mkwrite: no yes no |
| 521 | access: no yes | ||
| 520 | 522 | ||
| 521 | ->page_mkwrite() is called when a previously read-only page is | 523 | ->page_mkwrite() is called when a previously read-only page is |
| 522 | about to become writeable. The file system is responsible for | 524 | about to become writeable. The file system is responsible for |
| @@ -525,6 +527,11 @@ taking to lock out truncate, the page range should be verified to be | |||
| 525 | within i_size. The page mapping should also be checked that it is not | 527 | within i_size. The page mapping should also be checked that it is not |
| 526 | NULL. | 528 | NULL. |
| 527 | 529 | ||
| 530 | ->access() is called when get_user_pages() fails in | ||
| 531 | acces_process_vm(), typically used to debug a process through | ||
| 532 | /proc/pid/mem or ptrace. This function is needed only for | ||
| 533 | VM_IO | VM_PFNMAP VMAs. | ||
| 534 | |||
| 528 | ================================================================================ | 535 | ================================================================================ |
| 529 | Dubious stuff | 536 | Dubious stuff |
| 530 | 537 | ||
diff --git a/arch/Kconfig b/arch/Kconfig index 4d5ebbc1e72b..6093c0be58b0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
| @@ -31,6 +31,9 @@ config KRETPROBES | |||
| 31 | def_bool y | 31 | def_bool y |
| 32 | depends on KPROBES && HAVE_KRETPROBES | 32 | depends on KPROBES && HAVE_KRETPROBES |
| 33 | 33 | ||
| 34 | config HAVE_IOREMAP_PROT | ||
| 35 | def_bool n | ||
| 36 | |||
| 34 | config HAVE_KPROBES | 37 | config HAVE_KPROBES |
| 35 | def_bool n | 38 | def_bool n |
| 36 | 39 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 03980cb04291..b2ddfcf01728 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -21,6 +21,7 @@ config X86 | |||
| 21 | select HAVE_UNSTABLE_SCHED_CLOCK | 21 | select HAVE_UNSTABLE_SCHED_CLOCK |
| 22 | select HAVE_IDE | 22 | select HAVE_IDE |
| 23 | select HAVE_OPROFILE | 23 | select HAVE_OPROFILE |
| 24 | select HAVE_IOREMAP_PROT | ||
| 24 | select HAVE_KPROBES | 25 | select HAVE_KPROBES |
| 25 | select HAVE_KRETPROBES | 26 | select HAVE_KRETPROBES |
| 26 | select HAVE_DYNAMIC_FTRACE | 27 | select HAVE_DYNAMIC_FTRACE |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24c1d3c30186..016f335bbeea 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
| @@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, | |||
| 330 | return (void __iomem *)ret; | 330 | return (void __iomem *)ret; |
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | ||
| 334 | unsigned long prot_val) | ||
| 335 | { | ||
| 336 | return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), | ||
| 337 | __builtin_return_address(0)); | ||
| 338 | } | ||
| 339 | EXPORT_SYMBOL(ioremap_prot); | ||
| 340 | |||
| 333 | /** | 341 | /** |
| 334 | * iounmap - Free a IO remapping | 342 | * iounmap - Free a IO remapping |
| 335 | * @addr: virtual address from ioremap_* | 343 | * @addr: virtual address from ioremap_* |
diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 4df44ed54077..e876d89ac156 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h | |||
| @@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address) | |||
| 110 | */ | 110 | */ |
| 111 | extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); | 111 | extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); |
| 112 | extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); | 112 | extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); |
| 113 | extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, | ||
| 114 | unsigned long prot_val); | ||
| 113 | 115 | ||
| 114 | /* | 116 | /* |
| 115 | * The default ioremap() behavior is non-cached: | 117 | * The default ioremap() behavior is non-cached: |
diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index ddd8058a5026..22995c5c5adc 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h | |||
| @@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size); | |||
| 175 | */ | 175 | */ |
| 176 | extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); | 176 | extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); |
| 177 | extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); | 177 | extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); |
| 178 | extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, | ||
| 179 | unsigned long prot_val); | ||
| 178 | 180 | ||
| 179 | /* | 181 | /* |
| 180 | * The default ioremap() behavior is non-cached: | 182 | * The default ioremap() behavior is non-cached: |
diff --git a/include/linux/mm.h b/include/linux/mm.h index eb815cfc1b35..5c7f8f64f70e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -170,6 +170,12 @@ struct vm_operations_struct { | |||
| 170 | /* notification that a previously read-only page is about to become | 170 | /* notification that a previously read-only page is about to become |
| 171 | * writable, if an error is returned it will cause a SIGBUS */ | 171 | * writable, if an error is returned it will cause a SIGBUS */ |
| 172 | int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); | 172 | int (*page_mkwrite)(struct vm_area_struct *vma, struct page *page); |
| 173 | |||
| 174 | /* called by access_process_vm when get_user_pages() fails, typically | ||
| 175 | * for use by special VMAs that can switch between memory and hardware | ||
| 176 | */ | ||
| 177 | int (*access)(struct vm_area_struct *vma, unsigned long addr, | ||
| 178 | void *buf, int len, int write); | ||
| 173 | #ifdef CONFIG_NUMA | 179 | #ifdef CONFIG_NUMA |
| 174 | /* | 180 | /* |
| 175 | * set_policy() op must add a reference to any non-NULL @new mempolicy | 181 | * set_policy() op must add a reference to any non-NULL @new mempolicy |
| @@ -771,6 +777,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
| 771 | struct vm_area_struct *vma); | 777 | struct vm_area_struct *vma); |
| 772 | void unmap_mapping_range(struct address_space *mapping, | 778 | void unmap_mapping_range(struct address_space *mapping, |
| 773 | loff_t const holebegin, loff_t const holelen, int even_cows); | 779 | loff_t const holebegin, loff_t const holelen, int even_cows); |
| 780 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | ||
| 781 | void *buf, int len, int write); | ||
| 774 | 782 | ||
| 775 | static inline void unmap_shared_mapping_range(struct address_space *mapping, | 783 | static inline void unmap_shared_mapping_range(struct address_space *mapping, |
| 776 | loff_t const holebegin, loff_t const holelen) | 784 | loff_t const holebegin, loff_t const holelen) |
diff --git a/mm/memory.c b/mm/memory.c index 46dbed4b7446..87350321e66f 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -2751,6 +2751,86 @@ int in_gate_area_no_task(unsigned long addr) | |||
| 2751 | 2751 | ||
| 2752 | #endif /* __HAVE_ARCH_GATE_AREA */ | 2752 | #endif /* __HAVE_ARCH_GATE_AREA */ |
| 2753 | 2753 | ||
| 2754 | #ifdef CONFIG_HAVE_IOREMAP_PROT | ||
| 2755 | static resource_size_t follow_phys(struct vm_area_struct *vma, | ||
| 2756 | unsigned long address, unsigned int flags, | ||
| 2757 | unsigned long *prot) | ||
| 2758 | { | ||
| 2759 | pgd_t *pgd; | ||
| 2760 | pud_t *pud; | ||
| 2761 | pmd_t *pmd; | ||
| 2762 | pte_t *ptep, pte; | ||
| 2763 | spinlock_t *ptl; | ||
| 2764 | resource_size_t phys_addr = 0; | ||
| 2765 | struct mm_struct *mm = vma->vm_mm; | ||
| 2766 | |||
| 2767 | VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); | ||
| 2768 | |||
| 2769 | pgd = pgd_offset(mm, address); | ||
| 2770 | if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) | ||
| 2771 | goto no_page_table; | ||
| 2772 | |||
| 2773 | pud = pud_offset(pgd, address); | ||
| 2774 | if (pud_none(*pud) || unlikely(pud_bad(*pud))) | ||
| 2775 | goto no_page_table; | ||
| 2776 | |||
| 2777 | pmd = pmd_offset(pud, address); | ||
| 2778 | if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) | ||
| 2779 | goto no_page_table; | ||
| 2780 | |||
| 2781 | /* We cannot handle huge page PFN maps. Luckily they don't exist. */ | ||
| 2782 | if (pmd_huge(*pmd)) | ||
| 2783 | goto no_page_table; | ||
| 2784 | |||
| 2785 | ptep = pte_offset_map_lock(mm, pmd, address, &ptl); | ||
| 2786 | if (!ptep) | ||
| 2787 | goto out; | ||
| 2788 | |||
| 2789 | pte = *ptep; | ||
| 2790 | if (!pte_present(pte)) | ||
| 2791 | goto unlock; | ||
| 2792 | if ((flags & FOLL_WRITE) && !pte_write(pte)) | ||
| 2793 | goto unlock; | ||
| 2794 | phys_addr = pte_pfn(pte); | ||
| 2795 | phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ | ||
| 2796 | |||
| 2797 | *prot = pgprot_val(pte_pgprot(pte)); | ||
| 2798 | |||
| 2799 | unlock: | ||
| 2800 | pte_unmap_unlock(ptep, ptl); | ||
| 2801 | out: | ||
| 2802 | return phys_addr; | ||
| 2803 | no_page_table: | ||
| 2804 | return 0; | ||
| 2805 | } | ||
| 2806 | |||
| 2807 | int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, | ||
| 2808 | void *buf, int len, int write) | ||
| 2809 | { | ||
| 2810 | resource_size_t phys_addr; | ||
| 2811 | unsigned long prot = 0; | ||
| 2812 | void *maddr; | ||
| 2813 | int offset = addr & (PAGE_SIZE-1); | ||
| 2814 | |||
| 2815 | if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) | ||
| 2816 | return -EINVAL; | ||
| 2817 | |||
| 2818 | phys_addr = follow_phys(vma, addr, write, &prot); | ||
| 2819 | |||
| 2820 | if (!phys_addr) | ||
| 2821 | return -EINVAL; | ||
| 2822 | |||
| 2823 | maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); | ||
| 2824 | if (write) | ||
| 2825 | memcpy_toio(maddr + offset, buf, len); | ||
| 2826 | else | ||
| 2827 | memcpy_fromio(buf, maddr + offset, len); | ||
| 2828 | iounmap(maddr); | ||
| 2829 | |||
| 2830 | return len; | ||
| 2831 | } | ||
| 2832 | #endif | ||
| 2833 | |||
| 2754 | /* | 2834 | /* |
| 2755 | * Access another process' address space. | 2835 | * Access another process' address space. |
| 2756 | * Source/target buffer must be kernel space, | 2836 | * Source/target buffer must be kernel space, |
| @@ -2760,7 +2840,6 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
| 2760 | { | 2840 | { |
| 2761 | struct mm_struct *mm; | 2841 | struct mm_struct *mm; |
| 2762 | struct vm_area_struct *vma; | 2842 | struct vm_area_struct *vma; |
| 2763 | struct page *page; | ||
| 2764 | void *old_buf = buf; | 2843 | void *old_buf = buf; |
| 2765 | 2844 | ||
| 2766 | mm = get_task_mm(tsk); | 2845 | mm = get_task_mm(tsk); |
| @@ -2772,28 +2851,44 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
| 2772 | while (len) { | 2851 | while (len) { |
| 2773 | int bytes, ret, offset; | 2852 | int bytes, ret, offset; |
| 2774 | void *maddr; | 2853 | void *maddr; |
| 2854 | struct page *page = NULL; | ||
| 2775 | 2855 | ||
| 2776 | ret = get_user_pages(tsk, mm, addr, 1, | 2856 | ret = get_user_pages(tsk, mm, addr, 1, |
| 2777 | write, 1, &page, &vma); | 2857 | write, 1, &page, &vma); |
| 2778 | if (ret <= 0) | 2858 | if (ret <= 0) { |
| 2779 | break; | 2859 | /* |
| 2780 | 2860 | * Check if this is a VM_IO | VM_PFNMAP VMA, which | |
| 2781 | bytes = len; | 2861 | * we can access using slightly different code. |
| 2782 | offset = addr & (PAGE_SIZE-1); | 2862 | */ |
| 2783 | if (bytes > PAGE_SIZE-offset) | 2863 | #ifdef CONFIG_HAVE_IOREMAP_PROT |
| 2784 | bytes = PAGE_SIZE-offset; | 2864 | vma = find_vma(mm, addr); |
| 2785 | 2865 | if (!vma) | |
| 2786 | maddr = kmap(page); | 2866 | break; |
| 2787 | if (write) { | 2867 | if (vma->vm_ops && vma->vm_ops->access) |
| 2788 | copy_to_user_page(vma, page, addr, | 2868 | ret = vma->vm_ops->access(vma, addr, buf, |
| 2789 | maddr + offset, buf, bytes); | 2869 | len, write); |
| 2790 | set_page_dirty_lock(page); | 2870 | if (ret <= 0) |
| 2871 | #endif | ||
| 2872 | break; | ||
| 2873 | bytes = ret; | ||
| 2791 | } else { | 2874 | } else { |
| 2792 | copy_from_user_page(vma, page, addr, | 2875 | bytes = len; |
| 2793 | buf, maddr + offset, bytes); | 2876 | offset = addr & (PAGE_SIZE-1); |
| 2877 | if (bytes > PAGE_SIZE-offset) | ||
| 2878 | bytes = PAGE_SIZE-offset; | ||
| 2879 | |||
| 2880 | maddr = kmap(page); | ||
| 2881 | if (write) { | ||
| 2882 | copy_to_user_page(vma, page, addr, | ||
| 2883 | maddr + offset, buf, bytes); | ||
| 2884 | set_page_dirty_lock(page); | ||
| 2885 | } else { | ||
| 2886 | copy_from_user_page(vma, page, addr, | ||
| 2887 | buf, maddr + offset, bytes); | ||
| 2888 | } | ||
| 2889 | kunmap(page); | ||
| 2890 | page_cache_release(page); | ||
| 2794 | } | 2891 | } |
| 2795 | kunmap(page); | ||
| 2796 | page_cache_release(page); | ||
| 2797 | len -= bytes; | 2892 | len -= bytes; |
| 2798 | buf += bytes; | 2893 | buf += bytes; |
| 2799 | addr += bytes; | 2894 | addr += bytes; |
