aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm/lib
diff options
context:
space:
mode:
authorNicolas Pitre <nico@cam.org>2009-05-29 21:55:50 -0400
committerNicolas Pitre <nico@cam.org>2009-05-30 01:10:15 -0400
commitc626e3f5ca1d95ad2204d3128c26e7678714eb55 (patch)
tree7c75da068ff30389e1801a3801bd45d38ccae7a7 /arch/arm/lib
parentcb9dc92c0a1b76165c8c334402e27191084b2047 (diff)
[ARM] alternative copy_to_user: more precise fallback threshold
Previous size thresholds were guessed from various user space benchmarks using a kernel with and without the alternative uaccess option. This is however not as precise as a kernel based test to measure the real speed of each method. This adds a simple test bench to show the time needed for each method. With this, the optimal size treshold for the alternative implementation can be determined with more confidence. It appears that the optimal threshold for both copy_to_user and clear_user is around 64 bytes. This is not a surprise knowing that the memcpy and memset implementations need at least 64 bytes to achieve maximum throughput. One might suggest that such test be used to determine the optimal threshold at run time instead, but results are near enough to 64 on tested targets concerned by this alternative copy_to_user implementation, so adding some overhead associated with a variable threshold is probably not worth it for now. Signed-off-by: Nicolas Pitre <nico@marvell.com>
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c75
1 files changed, 73 insertions, 2 deletions
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 92838e79654d..6b967ffb6552 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -106,7 +106,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
106 * With frame pointer disabled, tail call optimization kicks in 106 * With frame pointer disabled, tail call optimization kicks in
107 * as well making this test almost invisible. 107 * as well making this test almost invisible.
108 */ 108 */
109 if (n < 1024) 109 if (n < 64)
110 return __copy_to_user_std(to, from, n); 110 return __copy_to_user_std(to, from, n);
111 return __copy_to_user_memcpy(to, from, n); 111 return __copy_to_user_memcpy(to, from, n);
112} 112}
@@ -151,7 +151,78 @@ out:
151unsigned long __clear_user(void __user *addr, unsigned long n) 151unsigned long __clear_user(void __user *addr, unsigned long n)
152{ 152{
153 /* See rational for this in __copy_to_user() above. */ 153 /* See rational for this in __copy_to_user() above. */
154 if (n < 256) 154 if (n < 64)
155 return __clear_user_std(addr, n); 155 return __clear_user_std(addr, n);
156 return __clear_user_memset(addr, n); 156 return __clear_user_memset(addr, n);
157} 157}
158
159#if 0
160
161/*
162 * This code is disabled by default, but kept around in case the chosen
163 * thresholds need to be revalidated. Some overhead (small but still)
164 * would be implied by a runtime determined variable threshold, and
165 * so far the measurement on concerned targets didn't show a worthwhile
166 * variation.
167 *
168 * Note that a fairly precise sched_clock() implementation is needed
169 * for results to make some sense.
170 */
171
172#include <linux/vmalloc.h>
173
174static int __init test_size_treshold(void)
175{
176 struct page *src_page, *dst_page;
177 void *user_ptr, *kernel_ptr;
178 unsigned long long t0, t1, t2;
179 int size, ret;
180
181 ret = -ENOMEM;
182 src_page = alloc_page(GFP_KERNEL);
183 if (!src_page)
184 goto no_src;
185 dst_page = alloc_page(GFP_KERNEL);
186 if (!dst_page)
187 goto no_dst;
188 kernel_ptr = page_address(src_page);
189 user_ptr = vmap(&dst_page, 1, VM_IOREMAP, __pgprot(__P010));
190 if (!user_ptr)
191 goto no_vmap;
192
193 /* warm up the src page dcache */
194 ret = __copy_to_user_memcpy(user_ptr, kernel_ptr, PAGE_SIZE);
195
196 for (size = PAGE_SIZE; size >= 4; size /= 2) {
197 t0 = sched_clock();
198 ret |= __copy_to_user_memcpy(user_ptr, kernel_ptr, size);
199 t1 = sched_clock();
200 ret |= __copy_to_user_std(user_ptr, kernel_ptr, size);
201 t2 = sched_clock();
202 printk("copy_to_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
203 }
204
205 for (size = PAGE_SIZE; size >= 4; size /= 2) {
206 t0 = sched_clock();
207 ret |= __clear_user_memset(user_ptr, size);
208 t1 = sched_clock();
209 ret |= __clear_user_std(user_ptr, size);
210 t2 = sched_clock();
211 printk("clear_user: %d %llu %llu\n", size, t1 - t0, t2 - t1);
212 }
213
214 if (ret)
215 ret = -EFAULT;
216
217 vunmap(user_ptr);
218no_vmap:
219 put_page(dst_page);
220no_dst:
221 put_page(src_page);
222no_src:
223 return ret;
224}
225
226subsys_initcall(test_size_treshold);
227
228#endif