diff options
author | Robin Holt <holt@sgi.com> | 2010-04-23 11:36:22 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-04-23 18:57:23 -0400 |
commit | 1f9cc3cb6a27521edfe0a21abf97d2bb11c4d237 (patch) | |
tree | c9af6a71398aed690c1fa813498a0aed8abf2d7b /arch/x86/include/asm/cacheflush.h | |
parent | 4daa2a8093ecd1148270a1fc64e99f072b8c2901 (diff) |
x86, pat: Update the page flags for memtype atomically instead of using memtype_lock
While testing an application using the xpmem (out of kernel) driver, we
noticed a significant page fault rate reduction of x86_64 with respect
to ia64. For one test running with 32 cpus, one thread per cpu, it
took 01:08 for each of the threads to vm_insert_pfn 2GB worth of pages.
For the same test running on 256 cpus, one thread per cpu, it took 14:48
to vm_insert_pfn 2 GB worth of pages.
The slowdown was tracked to lookup_memtype which acquires the
spinlock memtype_lock. This heavily contended lock was slowing down
vm_insert_pfn().
With the cmpxchg on page->flags method, both the 32 cpu and 256 cpu
cases take approx 00:01.3 seconds to complete.
Signed-off-by: Robin Holt <holt@sgi.com>
LKML-Reference: <20100423153627.751194346@gulag1.americas.sgi.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@gmail.com>
Cc: Rafael Wysocki <rjw@novell.com>
Reviewed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/include/asm/cacheflush.h')
-rw-r--r-- | arch/x86/include/asm/cacheflush.h | 44 |
1 files changed, 25 insertions, 19 deletions
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 634c40a739a6..c70068d05f70 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -44,9 +44,6 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, | |||
44 | memcpy(dst, src, len); | 44 | memcpy(dst, src, len); |
45 | } | 45 | } |
46 | 46 | ||
47 | #define PG_WC PG_arch_1 | ||
48 | PAGEFLAG(WC, WC) | ||
49 | |||
50 | #ifdef CONFIG_X86_PAT | 47 | #ifdef CONFIG_X86_PAT |
51 | /* | 48 | /* |
52 | * X86 PAT uses page flags WC and Uncached together to keep track of | 49 | * X86 PAT uses page flags WC and Uncached together to keep track of |
@@ -55,16 +52,24 @@ PAGEFLAG(WC, WC) | |||
55 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not | 52 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not |
56 | * been changed from its default (value of -1 used to denote this). | 53 | * been changed from its default (value of -1 used to denote this). |
57 | * Note we do not support _PAGE_CACHE_UC here. | 54 | * Note we do not support _PAGE_CACHE_UC here. |
58 | * | ||
59 | * Caller must hold memtype_lock for atomicity. | ||
60 | */ | 55 | */ |
56 | |||
57 | #define _PGMT_DEFAULT 0 | ||
58 | #define _PGMT_WC (1UL << PG_arch_1) | ||
59 | #define _PGMT_UC_MINUS (1UL << PG_uncached) | ||
60 | #define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) | ||
61 | #define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) | ||
62 | #define _PGMT_CLEAR_MASK (~_PGMT_MASK) | ||
63 | |||
61 | static inline unsigned long get_page_memtype(struct page *pg) | 64 | static inline unsigned long get_page_memtype(struct page *pg) |
62 | { | 65 | { |
63 | if (!PageUncached(pg) && !PageWC(pg)) | 66 | unsigned long pg_flags = pg->flags & _PGMT_MASK; |
67 | |||
68 | if (pg_flags == _PGMT_DEFAULT) | ||
64 | return -1; | 69 | return -1; |
65 | else if (!PageUncached(pg) && PageWC(pg)) | 70 | else if (pg_flags == _PGMT_WC) |
66 | return _PAGE_CACHE_WC; | 71 | return _PAGE_CACHE_WC; |
67 | else if (PageUncached(pg) && !PageWC(pg)) | 72 | else if (pg_flags == _PGMT_UC_MINUS) |
68 | return _PAGE_CACHE_UC_MINUS; | 73 | return _PAGE_CACHE_UC_MINUS; |
69 | else | 74 | else |
70 | return _PAGE_CACHE_WB; | 75 | return _PAGE_CACHE_WB; |
@@ -72,25 +77,26 @@ static inline unsigned long get_page_memtype(struct page *pg) | |||
72 | 77 | ||
73 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) | 78 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) |
74 | { | 79 | { |
80 | unsigned long memtype_flags = _PGMT_DEFAULT; | ||
81 | unsigned long old_flags; | ||
82 | unsigned long new_flags; | ||
83 | |||
75 | switch (memtype) { | 84 | switch (memtype) { |
76 | case _PAGE_CACHE_WC: | 85 | case _PAGE_CACHE_WC: |
77 | ClearPageUncached(pg); | 86 | memtype_flags = _PGMT_WC; |
78 | SetPageWC(pg); | ||
79 | break; | 87 | break; |
80 | case _PAGE_CACHE_UC_MINUS: | 88 | case _PAGE_CACHE_UC_MINUS: |
81 | SetPageUncached(pg); | 89 | memtype_flags = _PGMT_UC_MINUS; |
82 | ClearPageWC(pg); | ||
83 | break; | 90 | break; |
84 | case _PAGE_CACHE_WB: | 91 | case _PAGE_CACHE_WB: |
85 | SetPageUncached(pg); | 92 | memtype_flags = _PGMT_WB; |
86 | SetPageWC(pg); | ||
87 | break; | ||
88 | default: | ||
89 | case -1: | ||
90 | ClearPageUncached(pg); | ||
91 | ClearPageWC(pg); | ||
92 | break; | 93 | break; |
93 | } | 94 | } |
95 | |||
96 | do { | ||
97 | old_flags = pg->flags; | ||
98 | new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; | ||
99 | } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); | ||
94 | } | 100 | } |
95 | #else | 101 | #else |
96 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } | 102 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } |