diff options
author | Hiro Yoshioka <hyoshiok@miraclelinux.com> | 2006-06-23 05:04:16 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-23 10:42:56 -0400 |
commit | c22ce143d15eb288543fe9873e1c5ac1c01b69a1 (patch) | |
tree | dc7d457b8952fc50dfc90df659b35de4117c61fc /mm | |
parent | 7dbdf43cfa635ddc3701cc8d1eab07597cd731c0 (diff) |
[PATCH] x86: cache pollution aware __copy_from_user_ll()
Use the x86 cache-bypassing copy instructions for copy_from_user().
Some performance data are
Total of GLOBAL_POWER_EVENTS (CPU cycle samples)
2.6.12.4.orig 1921587
2.6.12.4.nt 1599424
1599424/1921587=83.23% (16.77% reduction)
BSQ_CACHE_REFERENCE (L3 cache miss)
2.6.12.4.orig 57427
2.6.12.4.nt 20858
20858/57427=36.32% (63.7% reduction)
L3 cache miss reduction of __copy_from_user_ll
samples %
37408 65.1412 vmlinux __copy_from_user_ll
23 0.1103 vmlinux __copy_user_zeroing_intel_nocache
23/37408=0.061% (99.94% reduction)
Top 5 of 2.6.12.4.nt
Counted GLOBAL_POWER_EVENTS events (time during which processor is not stopped) with a unit mask of 0x01 (mandatory) count 100000
samples % app name symbol name
128392 8.0274 vmlinux __copy_user_zeroing_intel_nocache
64206 4.0143 vmlinux journal_add_journal_head
59746 3.7355 vmlinux do_get_write_access
47674 2.9807 vmlinux journal_put_journal_head
46021 2.8774 vmlinux journal_dirty_metadata
pattern9-0-cpu4-0-09011728/summary.out
Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x3f (multiple flags) count 3000
samples % app name symbol name
69755 4.2861 vmlinux __copy_user_zeroing_intel_nocache
55685 3.4215 vmlinux journal_add_journal_head
52371 3.2179 vmlinux __find_get_block
45504 2.7960 vmlinux journal_put_journal_head
36005 2.2123 vmlinux journal_stop
pattern9-0-cpu4-0-09011744/summary.out
Counted BSQ_CACHE_REFERENCE events (cache references seen by the bus unit) with a unit mask of 0x200 (read 3rd level cache miss) count 3000
samples % app name symbol name
1147 5.4994 vmlinux journal_add_journal_head
881 4.2240 vmlinux journal_dirty_data
872 4.1809 vmlinux blk_rq_map_sg
734 3.5192 vmlinux journal_commit_transaction
617 2.9582 vmlinux radix_tree_delete
pattern9-0-cpu4-0-09011731/summary.out
iozone results are
original 2.6.12.4 CPU time = 207.768 sec
cache aware CPU time = 184.783 sec
(three times run)
184.783/207.768=88.94% (11.06% reduction)
original:
pattern9-0-cpu4-0-08191720/iozone.out: CPU Utilization: Wall time 45.997 CPU time 64.527 CPU utilization 140.28 %
pattern9-0-cpu4-0-08191741/iozone.out: CPU Utilization: Wall time 46.878 CPU time 71.933 CPU utilization 153.45 %
pattern9-0-cpu4-0-08191743/iozone.out: CPU Utilization: Wall time 45.152 CPU time 71.308 CPU utilization 157.93 %
cache awre:
pattern9-0-cpu4-0-09011728/iozone.out: CPU Utilization: Wall time 44.842 CPU time 62.465 CPU utilization 139.30 %
pattern9-0-cpu4-0-09011731/iozone.out: CPU Utilization: Wall time 44.718 CPU time 59.273 CPU utilization 132.55 %
pattern9-0-cpu4-0-09011744/iozone.out: CPU Utilization: Wall time 44.367 CPU time 63.045 CPU utilization 142.10 %
Signed-off-by: Hiro Yoshioka <hyoshiok@miraclelinux.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 4 | ||||
-rw-r--r-- | mm/filemap.h | 6 |
2 files changed, 5 insertions, 5 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 368678c2d53..807a463fd5e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/compiler.h> | 15 | #include <linux/compiler.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/uaccess.h> | ||
17 | #include <linux/aio.h> | 18 | #include <linux/aio.h> |
18 | #include <linux/capability.h> | 19 | #include <linux/capability.h> |
19 | #include <linux/kernel_stat.h> | 20 | #include <linux/kernel_stat.h> |
@@ -38,7 +39,6 @@ | |||
38 | */ | 39 | */ |
39 | #include <linux/buffer_head.h> /* for generic_osync_inode */ | 40 | #include <linux/buffer_head.h> /* for generic_osync_inode */ |
40 | 41 | ||
41 | #include <asm/uaccess.h> | ||
42 | #include <asm/mman.h> | 42 | #include <asm/mman.h> |
43 | 43 | ||
44 | static ssize_t | 44 | static ssize_t |
@@ -1902,7 +1902,7 @@ __filemap_copy_from_user_iovec(char *vaddr, | |||
1902 | int copy = min(bytes, iov->iov_len - base); | 1902 | int copy = min(bytes, iov->iov_len - base); |
1903 | 1903 | ||
1904 | base = 0; | 1904 | base = 0; |
1905 | left = __copy_from_user_inatomic(vaddr, buf, copy); | 1905 | left = __copy_from_user_inatomic_nocache(vaddr, buf, copy); |
1906 | copied += copy; | 1906 | copied += copy; |
1907 | bytes -= copy; | 1907 | bytes -= copy; |
1908 | vaddr += copy; | 1908 | vaddr += copy; |
diff --git a/mm/filemap.h b/mm/filemap.h index 13793ba0ce1..5683cde2205 100644 --- a/mm/filemap.h +++ b/mm/filemap.h | |||
@@ -13,7 +13,7 @@ | |||
13 | #include <linux/highmem.h> | 13 | #include <linux/highmem.h> |
14 | #include <linux/uio.h> | 14 | #include <linux/uio.h> |
15 | #include <linux/config.h> | 15 | #include <linux/config.h> |
16 | #include <asm/uaccess.h> | 16 | #include <linux/uaccess.h> |
17 | 17 | ||
18 | size_t | 18 | size_t |
19 | __filemap_copy_from_user_iovec(char *vaddr, | 19 | __filemap_copy_from_user_iovec(char *vaddr, |
@@ -34,13 +34,13 @@ filemap_copy_from_user(struct page *page, unsigned long offset, | |||
34 | int left; | 34 | int left; |
35 | 35 | ||
36 | kaddr = kmap_atomic(page, KM_USER0); | 36 | kaddr = kmap_atomic(page, KM_USER0); |
37 | left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); | 37 | left = __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes); |
38 | kunmap_atomic(kaddr, KM_USER0); | 38 | kunmap_atomic(kaddr, KM_USER0); |
39 | 39 | ||
40 | if (left != 0) { | 40 | if (left != 0) { |
41 | /* Do it the slow way */ | 41 | /* Do it the slow way */ |
42 | kaddr = kmap(page); | 42 | kaddr = kmap(page); |
43 | left = __copy_from_user(kaddr + offset, buf, bytes); | 43 | left = __copy_from_user_nocache(kaddr + offset, buf, bytes); |
44 | kunmap(page); | 44 | kunmap(page); |
45 | } | 45 | } |
46 | return bytes - left; | 46 | return bytes - left; |