diff options
author | Robin Holt <holt@sgi.com> | 2010-04-23 11:36:22 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2010-04-23 18:57:23 -0400 |
commit | 1f9cc3cb6a27521edfe0a21abf97d2bb11c4d237 (patch) | |
tree | c9af6a71398aed690c1fa813498a0aed8abf2d7b | |
parent | 4daa2a8093ecd1148270a1fc64e99f072b8c2901 (diff) |
x86, pat: Update the page flags for memtype atomically instead of using memtype_lock
While testing an application using the xpmem (out of kernel) driver, we
noticed a significant page fault rate reduction of x86_64 with respect
to ia64. For one test running with 32 cpus, one thread per cpu, it
took 01:08 for each of the threads to vm_insert_pfn 2GB worth of pages.
For the same test running on 256 cpus, one thread per cpu, it took 14:48
to vm_insert_pfn 2 GB worth of pages.
The slowdown was tracked to lookup_memtype which acquires the
spinlock memtype_lock. This heavily contended lock was slowing down
vm_insert_pfn().
With the cmpxchg on page->flags method, both the 32 cpu and 256 cpu
cases take approx 00:01.3 seconds to complete.
Signed-off-by: Robin Holt <holt@sgi.com>
LKML-Reference: <20100423153627.751194346@gulag1.americas.sgi.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@gmail.com>
Cc: Rafael Wysocki <rjw@novell.com>
Reviewed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r-- | arch/x86/include/asm/cacheflush.h | 44 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 8 |
2 files changed, 25 insertions, 27 deletions
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 634c40a739a6..c70068d05f70 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -44,9 +44,6 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, | |||
44 | memcpy(dst, src, len); | 44 | memcpy(dst, src, len); |
45 | } | 45 | } |
46 | 46 | ||
47 | #define PG_WC PG_arch_1 | ||
48 | PAGEFLAG(WC, WC) | ||
49 | |||
50 | #ifdef CONFIG_X86_PAT | 47 | #ifdef CONFIG_X86_PAT |
51 | /* | 48 | /* |
52 | * X86 PAT uses page flags WC and Uncached together to keep track of | 49 | * X86 PAT uses page flags WC and Uncached together to keep track of |
@@ -55,16 +52,24 @@ PAGEFLAG(WC, WC) | |||
55 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not | 52 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not |
56 | * been changed from its default (value of -1 used to denote this). | 53 | * been changed from its default (value of -1 used to denote this). |
57 | * Note we do not support _PAGE_CACHE_UC here. | 54 | * Note we do not support _PAGE_CACHE_UC here. |
58 | * | ||
59 | * Caller must hold memtype_lock for atomicity. | ||
60 | */ | 55 | */ |
56 | |||
57 | #define _PGMT_DEFAULT 0 | ||
58 | #define _PGMT_WC (1UL << PG_arch_1) | ||
59 | #define _PGMT_UC_MINUS (1UL << PG_uncached) | ||
60 | #define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) | ||
61 | #define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) | ||
62 | #define _PGMT_CLEAR_MASK (~_PGMT_MASK) | ||
63 | |||
61 | static inline unsigned long get_page_memtype(struct page *pg) | 64 | static inline unsigned long get_page_memtype(struct page *pg) |
62 | { | 65 | { |
63 | if (!PageUncached(pg) && !PageWC(pg)) | 66 | unsigned long pg_flags = pg->flags & _PGMT_MASK; |
67 | |||
68 | if (pg_flags == _PGMT_DEFAULT) | ||
64 | return -1; | 69 | return -1; |
65 | else if (!PageUncached(pg) && PageWC(pg)) | 70 | else if (pg_flags == _PGMT_WC) |
66 | return _PAGE_CACHE_WC; | 71 | return _PAGE_CACHE_WC; |
67 | else if (PageUncached(pg) && !PageWC(pg)) | 72 | else if (pg_flags == _PGMT_UC_MINUS) |
68 | return _PAGE_CACHE_UC_MINUS; | 73 | return _PAGE_CACHE_UC_MINUS; |
69 | else | 74 | else |
70 | return _PAGE_CACHE_WB; | 75 | return _PAGE_CACHE_WB; |
@@ -72,25 +77,26 @@ static inline unsigned long get_page_memtype(struct page *pg) | |||
72 | 77 | ||
73 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) | 78 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) |
74 | { | 79 | { |
80 | unsigned long memtype_flags = _PGMT_DEFAULT; | ||
81 | unsigned long old_flags; | ||
82 | unsigned long new_flags; | ||
83 | |||
75 | switch (memtype) { | 84 | switch (memtype) { |
76 | case _PAGE_CACHE_WC: | 85 | case _PAGE_CACHE_WC: |
77 | ClearPageUncached(pg); | 86 | memtype_flags = _PGMT_WC; |
78 | SetPageWC(pg); | ||
79 | break; | 87 | break; |
80 | case _PAGE_CACHE_UC_MINUS: | 88 | case _PAGE_CACHE_UC_MINUS: |
81 | SetPageUncached(pg); | 89 | memtype_flags = _PGMT_UC_MINUS; |
82 | ClearPageWC(pg); | ||
83 | break; | 90 | break; |
84 | case _PAGE_CACHE_WB: | 91 | case _PAGE_CACHE_WB: |
85 | SetPageUncached(pg); | 92 | memtype_flags = _PGMT_WB; |
86 | SetPageWC(pg); | ||
87 | break; | ||
88 | default: | ||
89 | case -1: | ||
90 | ClearPageUncached(pg); | ||
91 | ClearPageWC(pg); | ||
92 | break; | 93 | break; |
93 | } | 94 | } |
95 | |||
96 | do { | ||
97 | old_flags = pg->flags; | ||
98 | new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; | ||
99 | } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); | ||
94 | } | 100 | } |
95 | #else | 101 | #else |
96 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } | 102 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 951011166ef5..501fc60e5e4d 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -190,8 +190,6 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
190 | * Here we do two pass: | 190 | * Here we do two pass: |
191 | * - Find the memtype of all the pages in the range, look for any conflicts | 191 | * - Find the memtype of all the pages in the range, look for any conflicts |
192 | * - In case of no conflicts, set the new memtype for pages in the range | 192 | * - In case of no conflicts, set the new memtype for pages in the range |
193 | * | ||
194 | * Caller must hold memtype_lock for atomicity. | ||
195 | */ | 193 | */ |
196 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 194 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
197 | unsigned long *new_type) | 195 | unsigned long *new_type) |
@@ -297,9 +295,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
297 | is_range_ram = pat_pagerange_is_ram(start, end); | 295 | is_range_ram = pat_pagerange_is_ram(start, end); |
298 | if (is_range_ram == 1) { | 296 | if (is_range_ram == 1) { |
299 | 297 | ||
300 | spin_lock(&memtype_lock); | ||
301 | err = reserve_ram_pages_type(start, end, req_type, new_type); | 298 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
302 | spin_unlock(&memtype_lock); | ||
303 | 299 | ||
304 | return err; | 300 | return err; |
305 | } else if (is_range_ram < 0) { | 301 | } else if (is_range_ram < 0) { |
@@ -351,9 +347,7 @@ int free_memtype(u64 start, u64 end) | |||
351 | is_range_ram = pat_pagerange_is_ram(start, end); | 347 | is_range_ram = pat_pagerange_is_ram(start, end); |
352 | if (is_range_ram == 1) { | 348 | if (is_range_ram == 1) { |
353 | 349 | ||
354 | spin_lock(&memtype_lock); | ||
355 | err = free_ram_pages_type(start, end); | 350 | err = free_ram_pages_type(start, end); |
356 | spin_unlock(&memtype_lock); | ||
357 | 351 | ||
358 | return err; | 352 | return err; |
359 | } else if (is_range_ram < 0) { | 353 | } else if (is_range_ram < 0) { |
@@ -394,10 +388,8 @@ static unsigned long lookup_memtype(u64 paddr) | |||
394 | 388 | ||
395 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 389 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
396 | struct page *page; | 390 | struct page *page; |
397 | spin_lock(&memtype_lock); | ||
398 | page = pfn_to_page(paddr >> PAGE_SHIFT); | 391 | page = pfn_to_page(paddr >> PAGE_SHIFT); |
399 | rettype = get_page_memtype(page); | 392 | rettype = get_page_memtype(page); |
400 | spin_unlock(&memtype_lock); | ||
401 | /* | 393 | /* |
402 | * -1 from get_page_memtype() implies RAM page is in its | 394 | * -1 from get_page_memtype() implies RAM page is in its |
403 | * default state and not reserved, and hence of type WB | 395 | * default state and not reserved, and hence of type WB |