diff options
| -rw-r--r-- | arch/arm/Kconfig | 16 | ||||
| -rw-r--r-- | arch/arm/include/asm/uaccess.h | 2 | ||||
| -rw-r--r-- | arch/arm/lib/Makefile | 3 | ||||
| -rw-r--r-- | arch/arm/lib/clear_user.S | 3 | ||||
| -rw-r--r-- | arch/arm/lib/copy_to_user.S | 3 | ||||
| -rw-r--r-- | arch/arm/lib/uaccess_with_memcpy.c | 228 |
6 files changed, 253 insertions, 2 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c857111ab964..3473f8b8ede6 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
| @@ -1091,6 +1091,22 @@ config ALIGNMENT_TRAP | |||
| 1091 | correct operation of some network protocols. With an IP-only | 1091 | correct operation of some network protocols. With an IP-only |
| 1092 | configuration it is safe to say N, otherwise say Y. | 1092 | configuration it is safe to say N, otherwise say Y. |
| 1093 | 1093 | ||
| 1094 | config UACCESS_WITH_MEMCPY | ||
| 1095 | bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user() (EXPERIMENTAL)" | ||
| 1096 | depends on MMU && EXPERIMENTAL | ||
| 1097 | default y if CPU_FEROCEON | ||
| 1098 | help | ||
| 1099 | Implement faster copy_to_user and clear_user methods for CPU | ||
| 1100 | cores where a 8-word STM instruction give significantly higher | ||
| 1101 | memory write throughput than a sequence of individual 32bit stores. | ||
| 1102 | |||
| 1103 | A possible side effect is a slight increase in scheduling latency | ||
| 1104 | between threads sharing the same address space if they invoke | ||
| 1105 | such copy operations with large buffers. | ||
| 1106 | |||
| 1107 | However, if the CPU data cache is using a write-allocate mode, | ||
| 1108 | this option is unlikely to provide any performance gain. | ||
| 1109 | |||
| 1094 | endmenu | 1110 | endmenu |
| 1095 | 1111 | ||
| 1096 | menu "Boot options" | 1112 | menu "Boot options" |
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7897464e0c24..0da9bc9b3b1d 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h | |||
| @@ -386,7 +386,9 @@ do { \ | |||
| 386 | #ifdef CONFIG_MMU | 386 | #ifdef CONFIG_MMU |
| 387 | extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); | 387 | extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); |
| 388 | extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); | 388 | extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); |
| 389 | extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); | ||
| 389 | extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); | 390 | extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); |
| 391 | extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n); | ||
| 390 | #else | 392 | #else |
| 391 | #define __copy_from_user(to,from,n) (memcpy(to, (void __force *)from, n), 0) | 393 | #define __copy_from_user(to,from,n) (memcpy(to, (void __force *)from, n), 0) |
| 392 | #define __copy_to_user(to,from,n) (memcpy((void __force *)to, from, n), 0) | 394 | #define __copy_to_user(to,from,n) (memcpy((void __force *)to, from, n), 0) |
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 866f84a586ff..030ba7219f48 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile | |||
| @@ -29,6 +29,9 @@ else | |||
| 29 | endif | 29 | endif |
| 30 | endif | 30 | endif |
| 31 | 31 | ||
| 32 | # using lib_ here won't override already available weak symbols | ||
| 33 | obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o | ||
| 34 | |||
| 32 | lib-$(CONFIG_MMU) += $(mmu-y) | 35 | lib-$(CONFIG_MMU) += $(mmu-y) |
| 33 | 36 | ||
| 34 | ifeq ($(CONFIG_CPU_32v3),y) | 37 | ifeq ($(CONFIG_CPU_32v3),y) |
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 4d6bc71231f3..844f56785ebc 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S | |||
| @@ -18,7 +18,8 @@ | |||
| 18 | * : sz - number of bytes to clear | 18 | * : sz - number of bytes to clear |
| 19 | * Returns : number of bytes NOT cleared | 19 | * Returns : number of bytes NOT cleared |
| 20 | */ | 20 | */ |
| 21 | ENTRY(__clear_user) | 21 | ENTRY(__clear_user_std) |
| 22 | WEAK(__clear_user) | ||
| 22 | stmfd sp!, {r1, lr} | 23 | stmfd sp!, {r1, lr} |
| 23 | mov r2, #0 | 24 | mov r2, #0 |
| 24 | cmp r1, #4 | 25 | cmp r1, #4 |
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 22f968bbdffd..878820f0a320 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S | |||
| @@ -86,7 +86,8 @@ | |||
| 86 | 86 | ||
| 87 | .text | 87 | .text |
| 88 | 88 | ||
| 89 | ENTRY(__copy_to_user) | 89 | ENTRY(__copy_to_user_std) |
| 90 | WEAK(__copy_to_user) | ||
| 90 | 91 | ||
| 91 | #include "copy_template.S" | 92 | #include "copy_template.S" |
| 92 | 93 | ||
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c new file mode 100644 index 000000000000..6b967ffb6552 --- /dev/null +++ b/arch/arm/lib/uaccess_with_memcpy.c | |||
| @@ -0,0 +1,228 @@ | |||
| 1 | /* | ||
| 2 | * linux/arch/arm/lib/uaccess_with_memcpy.c | ||
| 3 | * | ||
| 4 | * Written by: Lennert Buytenhek and Nicolas Pitre | ||
| 5 | * Copyright (C) 2009 Marvell Semiconductor | ||
| 6 | * | ||
| 7 | * This program is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License version 2 as | ||
| 9 | * published by the Free Software Foundation. | ||
| 10 | */ | ||
| 11 | |||
| 12 | #include <linux/kernel.h> | ||
| 13 | #include <linux/ctype.h> | ||
| 14 | #include <linux/uaccess.h> | ||
| 15 | #include <linux/rwsem.h> | ||
| 16 | #include <linux/mm.h> | ||
| 17 | #include <linux/sched.h> | ||
| 18 | #include <linux/hardirq.h> /* for in_atomic() */ | ||
| 19 | #include <asm/current.h> | ||
| 20 | #include <asm/page.h> | ||
| 21 | |||
| 22 | static int | ||
| 23 | pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) | ||
| 24 | { | ||
| 25 | unsigned long addr = (unsigned long)_addr; | ||
| 26 | pgd_t *pgd; | ||
| 27 | pmd_t *pmd; | ||
| 28 | pte_t *pte; | ||
| 29 | spinlock_t *ptl; | ||
| 30 | |||
| 31 | pgd = pgd_offset(current->mm, addr); | ||
| 32 | if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) | ||
| 33 | return 0; | ||
| 34 | |||
| 35 | pmd = pmd_offset(pgd, addr); | ||
| 36 | if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) | ||
| 37 | return 0; | ||
| 38 | |||
| 39 | pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); | ||
| 40 | if (unlikely(!pte_present(*pte) || !pte_young(*pte) || | ||
| 41 | !pte_write(*pte) || !pte_dirty(*pte))) { | ||
| 42 | pte_unmap_unlock(pte, ptl); | ||
| 43 | return 0; | ||
| 44 | } | ||
| 45 | |||
| 46 | *ptep = pte; | ||
| 47 | *ptlp = ptl; | ||
| 48 | |||
| 49 | return 1; | ||
| 50 | } | ||
| 51 | |||
| 52 | static unsigned long noinline | ||
| 53 | __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) | ||
| 54 | { | ||
| 55 | int atomic; | ||
| 56 | |||
| 57 | if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { | ||
| 58 | memcpy((void *)to, from, n); | ||
| 59 | return 0; | ||
| 60 | } | ||
| 61 | |||
| 62 | /* the mmap semaphore is taken only if not in an atomic context */ | ||
| 63 | atomic = in_atomic(); | ||
| 64 | |||
| 65 | if (!atomic) | ||
| 66 | down_read(¤t->mm->mmap_sem); | ||
| 67 | while (n) { | ||
| 68 | pte_t *pte; | ||
| 69 | spinlock_t *ptl; | ||
| 70 | int tocopy; | ||
| 71 | |||
| 72 | while (!pin_page_for_write(to, &pte, &ptl)) { | ||
| 73 | if (!atomic) | ||
| 74 | up_read(¤t->mm->mmap_sem); | ||
| 75 | if (__put_user(0, (char __user *)to)) | ||
| 76 | goto out; | ||
| 77 | if (!atomic) | ||
| 78 | down_read(¤t->mm->mmap_sem); | ||
| 79 | } | ||
| 80 | |||
| 81 | tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; | ||
| 82 | if (tocopy > n) | ||
| 83 | tocopy = n; | ||
| 84 | |||
| 85 | memcpy((void *)to, from, tocopy); | ||
| 86 | to += tocopy; | ||
| 87 | from += tocopy; | ||
| 88 | n -= tocopy; | ||
| 89 | |||
| 90 | pte_unmap_unlock(pte, ptl); | ||
| 91 | } | ||
| 92 | if (!atomic) | ||
| 93 | up_read(¤t->mm->mmap_sem); | ||
| 94 | |||
| 95 | out: | ||
| 96 | return n; | ||
| 97 | } | ||
| 98 | |||
| 99 | unsigned long | ||
| 100 | __copy_to_user(void __user *to, const void *from, unsigned long n) | ||
| 101 | { | ||
| 102 | /* | ||
| 103 | * This test is stubbed out of the main function above to keep | ||
| 104 | * the overhead for small copies low by avoiding a large | ||
| 105 | * register dump on the stack just to reload them right away. | ||
| 106 | * With frame pointer disabled, tail call optimization kicks in | ||
| 107 | * as well making this test almost invisible. | ||
| 108 | */ | ||
| 109 | if (n < 64) | ||
| 110 | return __copy_to_user_std(to, from, n); | ||
| 111 | return __copy_to_user_memcpy(to, from, n); | ||
| 112 | } | ||
| 113 | |||
| 114 | static unsigned long noinline | ||
| 115 | __clear_user_memset(void __user *addr, unsigned long n) | ||
| 116 | { | ||
| 117 | if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { | ||
| 118 | memset((void *)addr, 0, n); | ||
| 119 | return 0; | ||
| 120 | } | ||
| 121 | |||
| 122 | down_read(¤t->mm->mmap_sem); | ||
| 123 | while (n) { | ||
| 124 | pte_t *pte; | ||
| 125 | spinlock_t *ptl; | ||
| 126 | int tocopy; | ||
| 127 | |||
| 128 | while (!pin_page_for_write(addr, &pte, &ptl)) { | ||
| 129 | up_read(¤t->mm->mmap_sem); | ||
| 130 | if (__put_user(0, (char __user *)addr)) | ||
| 131 | goto out; | ||
| 132 | down_read(¤t->mm->mmap_sem); | ||
| 133 | } | ||
| 134 | |||
| 135 | tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; | ||
| 136 | if (tocopy > n) | ||
| 137 | tocopy = n; | ||
| 138 | |||
| 139 | memset((void *)addr, 0, tocopy); | ||
| 140 | addr += tocopy; | ||
| 141 | n -= tocopy; | ||
| 142 | |||
| 143 | pte_unmap_unlock(pte, ptl); | ||
| 144 | } | ||
| 145 | up_read(¤t->mm->mmap_sem); | ||
| 146 | |||
| 147 | out: | ||
| 148 | return n; | ||
| 149 | } | ||
| 150 | |||
| 151 | unsigned long __clear_user(void __user *addr, unsigned long n) | ||
| 152 | { | ||
| 153 | /* See rational for this in __copy_to_user() above. */ | ||
| 154 | if (n < 64) | ||
| 155 | return __clear_user_std(addr, n); | ||
| 156 | return __clear_user_memset(addr, n); | ||
| 157 | } | ||
| 158 | |||
| 159 | #if 0 | ||
| 160 | |||
| 161 | /* | ||
| 162 | * This code is disabled by default, but kept around in case the chosen | ||
| 163 | * thresholds need to be revalidated. Some overhead (small but still) | ||
| 164 | * would be implied by a runtime determined variable threshold, and | ||
| 165 | * so far the measurement on concerned targets didn't show a worthwhile | ||
| 166 | * variation. | ||
| 167 | * | ||
| 168 | * Note that a fairly precise sched_clock() implementation is needed | ||
| 169 | * for results to make some sense. | ||
| 170 | */ | ||
| 171 | |||
| 172 | #include <linux/vmalloc.h> | ||
| 173 | |||
| 174 | static int __init test_size_treshold(void) | ||
| 175 | { | ||
| 176 | struct page *src_page, *dst_page; | ||
| 177 | void *user_ptr, *kernel_ptr; | ||
| 178 | unsigned long long t0, t1, t2; | ||
| 179 | int size, ret; | ||
| 180 | |||
| 181 | ret = -ENOMEM; | ||
| 182 | src_page = alloc_page(GFP_KERNEL); | ||
| 183 | if (!src_page) | ||
| 184 | goto no_src; | ||
| 185 | dst_page = alloc_page(GFP_KERNEL); | ||
| 186 | if (!dst_page) | ||
| 187 | goto no_dst; | ||
| 188 | kernel_ptr = page_address(src_page); | ||
| 189 | user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010)); | ||
| 190 | if (!user_ptr) | ||
| 191 | goto no_vmap; | ||
| 192 | |||
| 193 | /* warm up the src page dcache */ | ||
| 194 | ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE); | ||
| 195 | |||
| 196 | for (size = PAGE_SIZE; size >= 4; size /= 2) { | ||
| 197 | t0 = sched_clock(); | ||
| 198 | ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size); | ||
| 199 | t1 = sched_clock(); | ||
| 200 | ret |= __copy_to_user_std(user_ptr, kernel_ptr, size); | ||
| 201 | t2 = sched_clock(); | ||
| 202 | printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); | ||
| 203 | } | ||
| 204 | |||
| 205 | for (size = PAGE_SIZE; size >= 4; size /= 2) { | ||
| 206 | t0 = sched_clock(); | ||
| 207 | ret |= __clear_user_memset(user_ptr, size); | ||
| 208 | t1 = sched_clock(); | ||
| 209 | ret |= __clear_user_std(user_ptr, size); | ||
| 210 | t2 = sched_clock(); | ||
| 211 | printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); | ||
| 212 | } | ||
| 213 | |||
| 214 | if (ret) | ||
| 215 | ret = -EFAULT; | ||
| 216 | |||
| 217 | vunmap(user_ptr); | ||
| 218 | no_vmap: | ||
| 219 | put_page(dst_page); | ||
| 220 | no_dst: | ||
| 221 | put_page(src_page); | ||
| 222 | no_src: | ||
| 223 | return ret; | ||
| 224 | } | ||
| 225 | |||
| 226 | subsys_initcall(test_size_treshold); | ||
| 227 | |||
| 228 | #endif | ||
