aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Holt <holt@sgi.com>2010-04-23 11:36:22 -0400
committerH. Peter Anvin <hpa@zytor.com>2010-04-23 18:57:23 -0400
commit1f9cc3cb6a27521edfe0a21abf97d2bb11c4d237 (patch)
treec9af6a71398aed690c1fa813498a0aed8abf2d7b
parent4daa2a8093ecd1148270a1fc64e99f072b8c2901 (diff)
x86, pat: Update the page flags for memtype atomically instead of using memtype_lock
While testing an application using the xpmem (out of kernel) driver, we noticed a significant page fault rate reduction of x86_64 with respect to ia64. For one test running with 32 cpus, one thread per cpu, it took 01:08 for each of the threads to vm_insert_pfn 2GB worth of pages. For the same test running on 256 cpus, one thread per cpu, it took 14:48 to vm_insert_pfn 2 GB worth of pages. The slowdown was tracked to lookup_memtype which acquires the spinlock memtype_lock. This heavily contended lock was slowing down vm_insert_pfn(). With the cmpxchg on page->flags method, both the 32 cpu and 256 cpu cases take approx 00:01.3 seconds to complete. Signed-off-by: Robin Holt <holt@sgi.com> LKML-Reference: <20100423153627.751194346@gulag1.americas.sgi.com> Cc: Venkatesh Pallipadi <venkatesh.pallipadi@gmail.com> Cc: Rafael Wysocki <rjw@novell.com> Reviewed-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/include/asm/cacheflush.h44
-rw-r--r--arch/x86/mm/pat.c8
2 files changed, 25 insertions, 27 deletions
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index 634c40a739a6..c70068d05f70 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -44,9 +44,6 @@ static inline void copy_from_user_page(struct vm_area_struct *vma,
44 memcpy(dst, src, len); 44 memcpy(dst, src, len);
45} 45}
46 46
47#define PG_WC PG_arch_1
48PAGEFLAG(WC, WC)
49
50#ifdef CONFIG_X86_PAT 47#ifdef CONFIG_X86_PAT
51/* 48/*
52 * X86 PAT uses page flags WC and Uncached together to keep track of 49 * X86 PAT uses page flags WC and Uncached together to keep track of
@@ -55,16 +52,24 @@ PAGEFLAG(WC, WC)
55 * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not 52 * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not
56 * been changed from its default (value of -1 used to denote this). 53 * been changed from its default (value of -1 used to denote this).
57 * Note we do not support _PAGE_CACHE_UC here. 54 * Note we do not support _PAGE_CACHE_UC here.
58 *
59 * Caller must hold memtype_lock for atomicity.
60 */ 55 */
56
57#define _PGMT_DEFAULT 0
58#define _PGMT_WC (1UL << PG_arch_1)
59#define _PGMT_UC_MINUS (1UL << PG_uncached)
60#define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1)
61#define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1)
62#define _PGMT_CLEAR_MASK (~_PGMT_MASK)
63
61static inline unsigned long get_page_memtype(struct page *pg) 64static inline unsigned long get_page_memtype(struct page *pg)
62{ 65{
63 if (!PageUncached(pg) && !PageWC(pg)) 66 unsigned long pg_flags = pg->flags & _PGMT_MASK;
67
68 if (pg_flags == _PGMT_DEFAULT)
64 return -1; 69 return -1;
65 else if (!PageUncached(pg) && PageWC(pg)) 70 else if (pg_flags == _PGMT_WC)
66 return _PAGE_CACHE_WC; 71 return _PAGE_CACHE_WC;
67 else if (PageUncached(pg) && !PageWC(pg)) 72 else if (pg_flags == _PGMT_UC_MINUS)
68 return _PAGE_CACHE_UC_MINUS; 73 return _PAGE_CACHE_UC_MINUS;
69 else 74 else
70 return _PAGE_CACHE_WB; 75 return _PAGE_CACHE_WB;
@@ -72,25 +77,26 @@ static inline unsigned long get_page_memtype(struct page *pg)
72 77
73static inline void set_page_memtype(struct page *pg, unsigned long memtype) 78static inline void set_page_memtype(struct page *pg, unsigned long memtype)
74{ 79{
80 unsigned long memtype_flags = _PGMT_DEFAULT;
81 unsigned long old_flags;
82 unsigned long new_flags;
83
75 switch (memtype) { 84 switch (memtype) {
76 case _PAGE_CACHE_WC: 85 case _PAGE_CACHE_WC:
77 ClearPageUncached(pg); 86 memtype_flags = _PGMT_WC;
78 SetPageWC(pg);
79 break; 87 break;
80 case _PAGE_CACHE_UC_MINUS: 88 case _PAGE_CACHE_UC_MINUS:
81 SetPageUncached(pg); 89 memtype_flags = _PGMT_UC_MINUS;
82 ClearPageWC(pg);
83 break; 90 break;
84 case _PAGE_CACHE_WB: 91 case _PAGE_CACHE_WB:
85 SetPageUncached(pg); 92 memtype_flags = _PGMT_WB;
86 SetPageWC(pg);
87 break;
88 default:
89 case -1:
90 ClearPageUncached(pg);
91 ClearPageWC(pg);
92 break; 93 break;
93 } 94 }
95
96 do {
97 old_flags = pg->flags;
98 new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags;
99 } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags);
94} 100}
95#else 101#else
96static inline unsigned long get_page_memtype(struct page *pg) { return -1; } 102static inline unsigned long get_page_memtype(struct page *pg) { return -1; }
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 951011166ef5..501fc60e5e4d 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -190,8 +190,6 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
190 * Here we do two pass: 190 * Here we do two pass:
191 * - Find the memtype of all the pages in the range, look for any conflicts 191 * - Find the memtype of all the pages in the range, look for any conflicts
192 * - In case of no conflicts, set the new memtype for pages in the range 192 * - In case of no conflicts, set the new memtype for pages in the range
193 *
194 * Caller must hold memtype_lock for atomicity.
195 */ 193 */
196static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, 194static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
197 unsigned long *new_type) 195 unsigned long *new_type)
@@ -297,9 +295,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
297 is_range_ram = pat_pagerange_is_ram(start, end); 295 is_range_ram = pat_pagerange_is_ram(start, end);
298 if (is_range_ram == 1) { 296 if (is_range_ram == 1) {
299 297
300 spin_lock(&memtype_lock);
301 err = reserve_ram_pages_type(start, end, req_type, new_type); 298 err = reserve_ram_pages_type(start, end, req_type, new_type);
302 spin_unlock(&memtype_lock);
303 299
304 return err; 300 return err;
305 } else if (is_range_ram < 0) { 301 } else if (is_range_ram < 0) {
@@ -351,9 +347,7 @@ int free_memtype(u64 start, u64 end)
351 is_range_ram = pat_pagerange_is_ram(start, end); 347 is_range_ram = pat_pagerange_is_ram(start, end);
352 if (is_range_ram == 1) { 348 if (is_range_ram == 1) {
353 349
354 spin_lock(&memtype_lock);
355 err = free_ram_pages_type(start, end); 350 err = free_ram_pages_type(start, end);
356 spin_unlock(&memtype_lock);
357 351
358 return err; 352 return err;
359 } else if (is_range_ram < 0) { 353 } else if (is_range_ram < 0) {
@@ -394,10 +388,8 @@ static unsigned long lookup_memtype(u64 paddr)
394 388
395 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { 389 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
396 struct page *page; 390 struct page *page;
397 spin_lock(&memtype_lock);
398 page = pfn_to_page(paddr >> PAGE_SHIFT); 391 page = pfn_to_page(paddr >> PAGE_SHIFT);
399 rettype = get_page_memtype(page); 392 rettype = get_page_memtype(page);
400 spin_unlock(&memtype_lock);
401 /* 393 /*
402 * -1 from get_page_memtype() implies RAM page is in its 394 * -1 from get_page_memtype() implies RAM page is in its
403 * default state and not reserved, and hence of type WB 395 * default state and not reserved, and hence of type WB