aboutsummaryrefslogtreecommitdiffstats
path: root/include/asm-i386
diff options
context:
space:
mode:
authorHiro Yoshioka <hyoshiok@miraclelinux.com>2006-06-23 05:04:16 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-06-23 10:42:56 -0400
commitc22ce143d15eb288543fe9873e1c5ac1c01b69a1 (patch)
treedc7d457b8952fc50dfc90df659b35de4117c61fc /include/asm-i386
parent7dbdf43cfa635ddc3701cc8d1eab07597cd731c0 (diff)
[PATCH] x86: cache pollution aware __copy_from_user_ll()
Use the x86 cache-bypassing copy instructions for copy_from_user(). Some performance data are Total of GLOBAL_POWER_EVENTS (CPU cycle samples) 2.6.12.4.orig 1921587 2.6.12.4.nt 1599424 1599424/1921587=83.23% (16.77% reduction) BSQ_CACHE_REFERENCE (L3 cache miss) 2.6.12.4.orig 57427 2.6.12.4.nt 20858 20858/57427=36.32% (63.7% reduction) L3 cache miss reduction of __copy_from_user_ll samples % 37408 65.1412 vmlinux __copy_from_user_ll 23 0.1103 vmlinux __copy_user_zeroing_intel_nocache 23/37408=0.061% (99.94% reduction) Top 5 of 2.6.12.4.nt Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 100000 samples % app name symbol name 128392 8.0274 vmlinux __copy_user_zeroing_intel_nocache 64206 4.0143 vmlinux journal_add_journal_head 59746 3.7355 vmlinux do_get_write_access 47674 2.9807 vmlinux journal_put_journal_head 46021 2.8774 vmlinux journal_dirty_metadata pattern9-0-cpu4-0-09011728/summary.out Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x3f (multiple flags) count 3000 samples % app name symbol name 69755 4.2861 vmlinux __copy_user_zeroing_intel_nocache 55685 3.4215 vmlinux journal_add_journal_head 52371 3.2179 vmlinux __find_get_block 45504 2.7960 vmlinux journal_put_journal_head 36005 2.2123 vmlinux journal_stop pattern9-0-cpu4-0-09011744/summary.out Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x200 (read 3rd level cache miss) count 3000 samples % app name symbol name 1147 5.4994 vmlinux journal_add_journal_head 881 4.2240 vmlinux journal_dirty_data 872 4.1809 vmlinux blk_rq_map_sg 734 3.5192 vmlinux journal_commit_transaction 617 2.9582 vmlinux radix_tree_delete pattern9-0-cpu4-0-09011731/summary.out iozone results are original 2.6.12.4 CPU time = 207.768 sec cache aware CPU time = 184.783 sec (three times run) 184.783/207.768=88.94% (11.06% reduction) original: pattern9-0-cpu4-0-08191720/iozone.out: CPU Utilization: Wall time 45.997 CPU time 64.527 CPU utilization 140.28 % pattern9-0-cpu4-0-08191741/iozone.out: CPU Utilization: Wall time 46.878 CPU time 71.933 CPU utilization 153.45 % pattern9-0-cpu4-0-08191743/iozone.out: CPU Utilization: Wall time 45.152 CPU time 71.308 CPU utilization 157.93 % cache awre: pattern9-0-cpu4-0-09011728/iozone.out: CPU Utilization: Wall time 44.842 CPU time 62.465 CPU utilization 139.30 % pattern9-0-cpu4-0-09011731/iozone.out: CPU Utilization: Wall time 44.718 CPU time 59.273 CPU utilization 132.55 % pattern9-0-cpu4-0-09011744/iozone.out: CPU Utilization: Wall time 44.367 CPU time 63.045 CPU utilization 142.10 % Signed-off-by: Hiro Yoshioka <hyoshiok@miraclelinux.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/asm-i386')
-rw-r--r--include/asm-i386/uaccess.h33
1 files changed, 33 insertions, 0 deletions
diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
index 1ec65523ea5e..82af28a943ab 100644
--- a/include/asm-i386/uaccess.h
+++ b/include/asm-i386/uaccess.h
@@ -390,6 +390,8 @@ unsigned long __must_check __copy_to_user_ll(void __user *to,
390 const void *from, unsigned long n); 390 const void *from, unsigned long n);
391unsigned long __must_check __copy_from_user_ll(void *to, 391unsigned long __must_check __copy_from_user_ll(void *to,
392 const void __user *from, unsigned long n); 392 const void __user *from, unsigned long n);
393unsigned long __must_check __copy_from_user_ll_nocache(void *to,
394 const void __user *from, unsigned long n);
393 395
394/* 396/*
395 * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault 397 * Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
@@ -478,12 +480,43 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
478 return __copy_from_user_ll(to, from, n); 480 return __copy_from_user_ll(to, from, n);
479} 481}
480 482
483#define ARCH_HAS_NOCACHE_UACCESS
484
485static __always_inline unsigned long __copy_from_user_inatomic_nocache(void *to,
486 const void __user *from, unsigned long n)
487{
488 if (__builtin_constant_p(n)) {
489 unsigned long ret;
490
491 switch (n) {
492 case 1:
493 __get_user_size(*(u8 *)to, from, 1, ret, 1);
494 return ret;
495 case 2:
496 __get_user_size(*(u16 *)to, from, 2, ret, 2);
497 return ret;
498 case 4:
499 __get_user_size(*(u32 *)to, from, 4, ret, 4);
500 return ret;
501 }
502 }
503 return __copy_from_user_ll_nocache(to, from, n);
504}
505
481static __always_inline unsigned long 506static __always_inline unsigned long
482__copy_from_user(void *to, const void __user *from, unsigned long n) 507__copy_from_user(void *to, const void __user *from, unsigned long n)
483{ 508{
484 might_sleep(); 509 might_sleep();
485 return __copy_from_user_inatomic(to, from, n); 510 return __copy_from_user_inatomic(to, from, n);
486} 511}
512
513static __always_inline unsigned long
514__copy_from_user_nocache(void *to, const void __user *from, unsigned long n)
515{
516 might_sleep();
517 return __copy_from_user_inatomic_nocache(to, from, n);
518}
519
487unsigned long __must_check copy_to_user(void __user *to, 520unsigned long __must_check copy_to_user(void __user *to,
488 const void *from, unsigned long n); 521 const void *from, unsigned long n);
489unsigned long __must_check copy_from_user(void *to, 522unsigned long __must_check copy_from_user(void *to,