diff options
author | Russell King <rmk@dyn-67.arm.linux.org.uk> | 2009-06-14 05:59:32 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-06-14 05:59:32 -0400 |
commit | 98797a241e28b787b84d308b867ec4c5fe7bbdf8 (patch) | |
tree | edd39824d91bd8cf33d8903e10ed337611483360 /arch | |
parent | ca8cbc8391cbd4d6e4304fc6b62682ed93d2b165 (diff) | |
parent | c626e3f5ca1d95ad2204d3128c26e7678714eb55 (diff) |
Merge branch 'copy_user' of git://git.marvell.com/orion into devel
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/Kconfig | 16 | ||||
-rw-r--r-- | arch/arm/include/asm/uaccess.h | 2 | ||||
-rw-r--r-- | arch/arm/lib/Makefile | 3 | ||||
-rw-r--r-- | arch/arm/lib/clear_user.S | 3 | ||||
-rw-r--r-- | arch/arm/lib/copy_to_user.S | 3 | ||||
-rw-r--r-- | arch/arm/lib/uaccess_with_memcpy.c | 228 |
6 files changed, 253 insertions, 2 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c857111ab96..3473f8b8ede 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -1091,6 +1091,22 @@ config ALIGNMENT_TRAP | |||
1091 | correct operation of some network protocols. With an IP-only | 1091 | correct operation of some network protocols. With an IP-only |
1092 | configuration it is safe to say N, otherwise say Y. | 1092 | configuration it is safe to say N, otherwise say Y. |
1093 | 1093 | ||
1094 | config UACCESS_WITH_MEMCPY | ||
1095 | bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user() (EXPERIMENTAL)" | ||
1096 | depends on MMU && EXPERIMENTAL | ||
1097 | default y if CPU_FEROCEON | ||
1098 | help | ||
1099 | Implement faster copy_to_user and clear_user methods for CPU | ||
1100 | cores where a 8-word STM instruction give significantly higher | ||
1101 | memory write throughput than a sequence of individual 32bit stores. | ||
1102 | |||
1103 | A possible side effect is a slight increase in scheduling latency | ||
1104 | between threads sharing the same address space if they invoke | ||
1105 | such copy operations with large buffers. | ||
1106 | |||
1107 | However, if the CPU data cache is using a write-allocate mode, | ||
1108 | this option is unlikely to provide any performance gain. | ||
1109 | |||
1094 | endmenu | 1110 | endmenu |
1095 | 1111 | ||
1096 | menu "Boot options" | 1112 | menu "Boot options" |
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 7897464e0c2..0da9bc9b3b1 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h | |||
@@ -386,7 +386,9 @@ do { \ | |||
386 | #ifdef CONFIG_MMU | 386 | #ifdef CONFIG_MMU |
387 | extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); | 387 | extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); |
388 | extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); | 388 | extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); |
389 | extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); | ||
389 | extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); | 390 | extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); |
391 | extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n); | ||
390 | #else | 392 | #else |
391 | #define __copy_from_user(to,from,n) (memcpy(to, (void __force *)from, n), 0) | 393 | #define __copy_from_user(to,from,n) (memcpy(to, (void __force *)from, n), 0) |
392 | #define __copy_to_user(to,from,n) (memcpy((void __force *)to, from, n), 0) | 394 | #define __copy_to_user(to,from,n) (memcpy((void __force *)to, from, n), 0) |
diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 866f84a586f..030ba7219f4 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile | |||
@@ -29,6 +29,9 @@ else | |||
29 | endif | 29 | endif |
30 | endif | 30 | endif |
31 | 31 | ||
32 | # using lib_ here won't override already available weak symbols | ||
33 | obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o | ||
34 | |||
32 | lib-$(CONFIG_MMU) += $(mmu-y) | 35 | lib-$(CONFIG_MMU) += $(mmu-y) |
33 | 36 | ||
34 | ifeq ($(CONFIG_CPU_32v3),y) | 37 | ifeq ($(CONFIG_CPU_32v3),y) |
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 4d6bc71231f..844f56785eb 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S | |||
@@ -18,7 +18,8 @@ | |||
18 | * : sz - number of bytes to clear | 18 | * : sz - number of bytes to clear |
19 | * Returns : number of bytes NOT cleared | 19 | * Returns : number of bytes NOT cleared |
20 | */ | 20 | */ |
21 | ENTRY(__clear_user) | 21 | ENTRY(__clear_user_std) |
22 | WEAK(__clear_user) | ||
22 | stmfd sp!, {r1, lr} | 23 | stmfd sp!, {r1, lr} |
23 | mov r2, #0 | 24 | mov r2, #0 |
24 | cmp r1, #4 | 25 | cmp r1, #4 |
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 22f968bbdff..878820f0a32 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S | |||
@@ -86,7 +86,8 @@ | |||
86 | 86 | ||
87 | .text | 87 | .text |
88 | 88 | ||
89 | ENTRY(__copy_to_user) | 89 | ENTRY(__copy_to_user_std) |
90 | WEAK(__copy_to_user) | ||
90 | 91 | ||
91 | #include "copy_template.S" | 92 | #include "copy_template.S" |
92 | 93 | ||
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c new file mode 100644 index 00000000000..6b967ffb655 --- /dev/null +++ b/arch/arm/lib/uaccess_with_memcpy.c | |||
@@ -0,0 +1,228 @@ | |||
1 | /* | ||
2 | * linux/arch/arm/lib/uaccess_with_memcpy.c | ||
3 | * | ||
4 | * Written by: Lennert Buytenhek and Nicolas Pitre | ||
5 | * Copyright (C) 2009 Marvell Semiconductor | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | */ | ||
11 | |||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/ctype.h> | ||
14 | #include <linux/uaccess.h> | ||
15 | #include <linux/rwsem.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/hardirq.h> /* for in_atomic() */ | ||
19 | #include <asm/current.h> | ||
20 | #include <asm/page.h> | ||
21 | |||
22 | static int | ||
23 | pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp) | ||
24 | { | ||
25 | unsigned long addr = (unsigned long)_addr; | ||
26 | pgd_t *pgd; | ||
27 | pmd_t *pmd; | ||
28 | pte_t *pte; | ||
29 | spinlock_t *ptl; | ||
30 | |||
31 | pgd = pgd_offset(current->mm, addr); | ||
32 | if (unlikely(pgd_none(*pgd) || pgd_bad(*pgd))) | ||
33 | return 0; | ||
34 | |||
35 | pmd = pmd_offset(pgd, addr); | ||
36 | if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) | ||
37 | return 0; | ||
38 | |||
39 | pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); | ||
40 | if (unlikely(!pte_present(*pte) || !pte_young(*pte) || | ||
41 | !pte_write(*pte) || !pte_dirty(*pte))) { | ||
42 | pte_unmap_unlock(pte, ptl); | ||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | *ptep = pte; | ||
47 | *ptlp = ptl; | ||
48 | |||
49 | return 1; | ||
50 | } | ||
51 | |||
52 | static unsigned long noinline | ||
53 | __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) | ||
54 | { | ||
55 | int atomic; | ||
56 | |||
57 | if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { | ||
58 | memcpy((void *)to, from, n); | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | /* the mmap semaphore is taken only if not in an atomic context */ | ||
63 | atomic = in_atomic(); | ||
64 | |||
65 | if (!atomic) | ||
66 | down_read(¤t->mm->mmap_sem); | ||
67 | while (n) { | ||
68 | pte_t *pte; | ||
69 | spinlock_t *ptl; | ||
70 | int tocopy; | ||
71 | |||
72 | while (!pin_page_for_write(to, &pte, &ptl)) { | ||
73 | if (!atomic) | ||
74 | up_read(¤t->mm->mmap_sem); | ||
75 | if (__put_user(0, (char __user *)to)) | ||
76 | goto out; | ||
77 | if (!atomic) | ||
78 | down_read(¤t->mm->mmap_sem); | ||
79 | } | ||
80 | |||
81 | tocopy = (~(unsigned long)to & ~PAGE_MASK) + 1; | ||
82 | if (tocopy > n) | ||
83 | tocopy = n; | ||
84 | |||
85 | memcpy((void *)to, from, tocopy); | ||
86 | to += tocopy; | ||
87 | from += tocopy; | ||
88 | n -= tocopy; | ||
89 | |||
90 | pte_unmap_unlock(pte, ptl); | ||
91 | } | ||
92 | if (!atomic) | ||
93 | up_read(¤t->mm->mmap_sem); | ||
94 | |||
95 | out: | ||
96 | return n; | ||
97 | } | ||
98 | |||
99 | unsigned long | ||
100 | __copy_to_user(void __user *to, const void *from, unsigned long n) | ||
101 | { | ||
102 | /* | ||
103 | * This test is stubbed out of the main function above to keep | ||
104 | * the overhead for small copies low by avoiding a large | ||
105 | * register dump on the stack just to reload them right away. | ||
106 | * With frame pointer disabled, tail call optimization kicks in | ||
107 | * as well making this test almost invisible. | ||
108 | */ | ||
109 | if (n < 64) | ||
110 | return __copy_to_user_std(to, from, n); | ||
111 | return __copy_to_user_memcpy(to, from, n); | ||
112 | } | ||
113 | |||
114 | static unsigned long noinline | ||
115 | __clear_user_memset(void __user *addr, unsigned long n) | ||
116 | { | ||
117 | if (unlikely(segment_eq(get_fs(), KERNEL_DS))) { | ||
118 | memset((void *)addr, 0, n); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | down_read(¤t->mm->mmap_sem); | ||
123 | while (n) { | ||
124 | pte_t *pte; | ||
125 | spinlock_t *ptl; | ||
126 | int tocopy; | ||
127 | |||
128 | while (!pin_page_for_write(addr, &pte, &ptl)) { | ||
129 | up_read(¤t->mm->mmap_sem); | ||
130 | if (__put_user(0, (char __user *)addr)) | ||
131 | goto out; | ||
132 | down_read(¤t->mm->mmap_sem); | ||
133 | } | ||
134 | |||
135 | tocopy = (~(unsigned long)addr & ~PAGE_MASK) + 1; | ||
136 | if (tocopy > n) | ||
137 | tocopy = n; | ||
138 | |||
139 | memset((void *)addr, 0, tocopy); | ||
140 | addr += tocopy; | ||
141 | n -= tocopy; | ||
142 | |||
143 | pte_unmap_unlock(pte, ptl); | ||
144 | } | ||
145 | up_read(¤t->mm->mmap_sem); | ||
146 | |||
147 | out: | ||
148 | return n; | ||
149 | } | ||
150 | |||
151 | unsigned long __clear_user(void __user *addr, unsigned long n) | ||
152 | { | ||
153 | /* See rational for this in __copy_to_user() above. */ | ||
154 | if (n < 64) | ||
155 | return __clear_user_std(addr, n); | ||
156 | return __clear_user_memset(addr, n); | ||
157 | } | ||
158 | |||
159 | #if 0 | ||
160 | |||
161 | /* | ||
162 | * This code is disabled by default, but kept around in case the chosen | ||
163 | * thresholds need to be revalidated. Some overhead (small but still) | ||
164 | * would be implied by a runtime determined variable threshold, and | ||
165 | * so far the measurement on concerned targets didn't show a worthwhile | ||
166 | * variation. | ||
167 | * | ||
168 | * Note that a fairly precise sched_clock() implementation is needed | ||
169 | * for results to make some sense. | ||
170 | */ | ||
171 | |||
172 | #include <linux/vmalloc.h> | ||
173 | |||
174 | static int __init test_size_treshold(void) | ||
175 | { | ||
176 | struct page *src_page, *dst_page; | ||
177 | void *user_ptr, *kernel_ptr; | ||
178 | unsigned long long t0, t1, t2; | ||
179 | int size, ret; | ||
180 | |||
181 | ret = -ENOMEM; | ||
182 | src_page = alloc_page(GFP_KERNEL); | ||
183 | if (!src_page) | ||
184 | goto no_src; | ||
185 | dst_page = alloc_page(GFP_KERNEL); | ||
186 | if (!dst_page) | ||
187 | goto no_dst; | ||
188 | kernel_ptr = page_address(src_page); | ||
189 | user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010)); | ||
190 | if (!user_ptr) | ||
191 | goto no_vmap; | ||
192 | |||
193 | /* warm up the src page dcache */ | ||
194 | ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE); | ||
195 | |||
196 | for (size = PAGE_SIZE; size >= 4; size /= 2) { | ||
197 | t0 = sched_clock(); | ||
198 | ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size); | ||
199 | t1 = sched_clock(); | ||
200 | ret |= __copy_to_user_std(user_ptr, kernel_ptr, size); | ||
201 | t2 = sched_clock(); | ||
202 | printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); | ||
203 | } | ||
204 | |||
205 | for (size = PAGE_SIZE; size >= 4; size /= 2) { | ||
206 | t0 = sched_clock(); | ||
207 | ret |= __clear_user_memset(user_ptr, size); | ||
208 | t1 = sched_clock(); | ||
209 | ret |= __clear_user_std(user_ptr, size); | ||
210 | t2 = sched_clock(); | ||
211 | printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1); | ||
212 | } | ||
213 | |||
214 | if (ret) | ||
215 | ret = -EFAULT; | ||
216 | |||
217 | vunmap(user_ptr); | ||
218 | no_vmap: | ||
219 | put_page(dst_page); | ||
220 | no_dst: | ||
221 | put_page(src_page); | ||
222 | no_src: | ||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | subsys_initcall(test_size_treshold); | ||
227 | |||
228 | #endif | ||