aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/lib
diff options
context:
space:
mode:
authorNicolas Pitre <nico@cam.org>2009-05-21 22:17:17 -0400
committerNicolas Pitre <nico@cam.org>2009-05-29 22:38:33 -0400
commitcb9dc92c0a1b76165c8c334402e27191084b2047 (patch)
tree92e7d43692bae2facd227a3c4c8757cf1a1754d0 /arch/arm/lib
parent39ec58f3fea47c242724109cc1da999f74810bbc (diff)
[ARM] lower overhead with alternative copy_to_user for small copies
Because the alternate copy_to_user implementation has a higher setup cost than the standard implementation, the size of the memory area to copy is tested and the standard implementation invoked instead when that size is too small. Still, that test is made after the processor has preserved a bunch of registers on the stack which have to be reloaded right away needlessly in that case, causing a measurable performance regression compared to plain usage of the standard implementation only. To make the size test overhead negligible, let's factorize it out of the alternate copy_to_user function where it is clear to the compiler that no stack frame is needed. Thanks to CONFIG_ARM_UNWIND allowing for frame pointers to be disabled and tail call optimization to kick in, the overhead in the small copy case becomes only 3 assembly instructions. A similar trick is applied to clear_user as well. Signed-off-by: Nicolas Pitre <nico@marvell.com>
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c36
1 files changed, 27 insertions, 9 deletions
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index bf987b4a2571..92838e79654d 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -49,14 +49,11 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
49 return 1; 49 return 1;
50} 50}
51 51
52unsigned long 52static unsigned long noinline
53__copy_to_user(void __user *to, const void *from, unsigned long n) 53__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
54{ 54{
55 int atomic; 55 int atomic;
56 56
57 if (n < 1024)
58 return __copy_to_user_std(to, from, n);
59
60 if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { 57 if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
61 memcpy((void *)to, from, n); 58 memcpy((void *)to, from, n);
62 return 0; 59 return 0;
@@ -99,11 +96,24 @@ out:
99 return n; 96 return n;
100} 97}
101 98
102unsigned long __clear_user(void __user *addr, unsigned long n) 99unsigned long
100__copy_to_user(void __user *to, const void *from, unsigned long n)
101{
102 /*
103 * This test is stubbed out of the main function above to keep
104 * the overhead for small copies low by avoiding a large
105 * register dump on the stack just to reload them right away.
106 * With frame pointer disabled, tail call optimization kicks in
107 * as well making this test almost invisible.
108 */
109 if (n < 1024)
110 return __copy_to_user_std(to, from, n);
111 return __copy_to_user_memcpy(to, from, n);
112}
113
114static unsigned long noinline
115__clear_user_memset(void __user *addr, unsigned long n)
103{ 116{
104 if (n < 256)
105 return __clear_user_std(addr, n);
106
107 if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { 117 if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
108 memset((void *)addr, 0, n); 118 memset((void *)addr, 0, n);
109 return 0; 119 return 0;
@@ -137,3 +147,11 @@ unsigned long __clear_user(void __user *addr, unsigned long n)
137out: 147out:
138 return n; 148 return n;
139} 149}
150
151unsigned long __clear_user(void __user *addr, unsigned long n)
152{
153 /* See rational for this in __copy_to_user() above. */
154 if (n < 256)
155 return __clear_user_std(addr, n);
156 return __clear_user_memset(addr, n);
157}