aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@suse.de>2006-09-26 04:52:38 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:38 -0400
commit53ee11ae0d73f28029a5f0d991bc4dcd7c817e7a (patch)
treec0a025cc592d516674bb1d8a0046bf281b6d1724
parent80d2679cbc8e170011c9649fb8fb684ffd7e5c8f (diff)
[PATCH] Optimize PDA accesses slightly
Based on a idea by Jeremy Fitzhardinge: Replace the volatiles and memory clobbers in the PDA access with telling gcc about access to a proxy PDA structure that doesn't actually exist. But the dummy accesses give a defined ordering for read/write accesses. Also add some memory barriers to the early GS initialization to make sure no PDA access is moved before it. Advantage is some .text savings (probably most from better code for accessing "current"): text data bss dec hex filename 4845647 1223688 615864 6685199 66020f vmlinux 4837780 1223688 615864 6677332 65e354 vmlinux-pda 1.2% smaller code Cc: Jeremy Fitzhardinge <jeremy@goop.org> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/x86_64/kernel/setup64.c3
-rw-r--r--include/asm-x86_64/pda.h41
2 files changed, 25 insertions, 19 deletions
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index e85cfbb49b63..491361752c70 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -121,7 +121,10 @@ void pda_init(int cpu)
121 121
122 /* Setup up data that may be needed in __get_free_pages early */ 122 /* Setup up data that may be needed in __get_free_pages early */
123 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); 123 asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
124 /* Memory clobbers used to order PDA accessed */
125 mb();
124 wrmsrl(MSR_GS_BASE, pda); 126 wrmsrl(MSR_GS_BASE, pda);
127 mb();
125 128
126 pda->cpunumber = cpu; 129 pda->cpunumber = cpu;
127 pda->irqcount = -1; 130 pda->irqcount = -1;
diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h
index b47c3df9ed1d..55e21da96e7a 100644
--- a/include/asm-x86_64/pda.h
+++ b/include/asm-x86_64/pda.h
@@ -36,40 +36,43 @@ extern struct x8664_pda boot_cpu_pda[];
36 * There is no fast way to get the base address of the PDA, all the accesses 36 * There is no fast way to get the base address of the PDA, all the accesses
37 * have to mention %fs/%gs. So it needs to be done this Torvaldian way. 37 * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
38 */ 38 */
39#define sizeof_field(type,field) (sizeof(((type *)0)->field))
40#define typeof_field(type,field) typeof(((type *)0)->field)
41
42extern void __bad_pda_field(void); 39extern void __bad_pda_field(void);
43 40
41/* proxy_pda doesn't actually exist, but tell gcc it is accessed
42 for all PDA accesses so it gets read/write dependencies right. */
43extern struct x8664_pda _proxy_pda;
44
44#define pda_offset(field) offsetof(struct x8664_pda, field) 45#define pda_offset(field) offsetof(struct x8664_pda, field)
45 46
46#define pda_to_op(op,field,val) do { \ 47#define pda_to_op(op,field,val) do { \
47 typedef typeof_field(struct x8664_pda, field) T__; \ 48 typedef typeof(_proxy_pda.field) T__; \
48 switch (sizeof_field(struct x8664_pda, field)) { \ 49 switch (sizeof(_proxy_pda.field)) { \
49case 2: \ 50case 2: \
50asm volatile(op "w %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ 51asm(op "w %1,%%gs:%P2" : "+m" (_proxy_pda.field) : \
52 "ri" ((T__)val),"i"(pda_offset(field))); break; \
51case 4: \ 53case 4: \
52asm volatile(op "l %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ 54asm(op "l %1,%%gs:%P2" : "+m" (_proxy_pda.field) : \
55 "ri" ((T__)val),"i"(pda_offset(field))); break; \
53case 8: \ 56case 8: \
54asm volatile(op "q %0,%%gs:%P1"::"ri" ((T__)val),"i"(pda_offset(field)):"memory"); break; \ 57asm(op "q %1,%%gs:%P2": "+m" (_proxy_pda.field) : \
55 default: __bad_pda_field(); \ 58 "ri" ((T__)val),"i"(pda_offset(field))); break; \
59default: __bad_pda_field(); \
56 } \ 60 } \
57 } while (0) 61 } while (0)
58 62
59/*
60 * AK: PDA read accesses should be neither volatile nor have an memory clobber.
61 * Unfortunately removing them causes all hell to break lose currently.
62 */
63#define pda_from_op(op,field) ({ \ 63#define pda_from_op(op,field) ({ \
64 typeof_field(struct x8664_pda, field) ret__; \ 64 typeof(_proxy_pda.field) ret__; \
65 switch (sizeof_field(struct x8664_pda, field)) { \ 65 switch (sizeof(_proxy_pda.field)) { \
66case 2: \ 66case 2: \
67asm volatile(op "w %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ 67asm(op "w %%gs:%P1,%0":"=r" (ret__):\
68 "i" (pda_offset(field)), "m" (_proxy_pda.field)); break;\
68case 4: \ 69case 4: \
69asm volatile(op "l %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ 70asm(op "l %%gs:%P1,%0":"=r" (ret__):\
71 "i" (pda_offset(field)), "m" (_proxy_pda.field)); break;\
70case 8: \ 72case 8: \
71asm volatile(op "q %%gs:%P1,%0":"=r" (ret__):"i"(pda_offset(field)):"memory"); break;\ 73asm(op "q %%gs:%P1,%0":"=r" (ret__):\
72 default: __bad_pda_field(); \ 74 "i" (pda_offset(field)), "m" (_proxy_pda.field)); break;\
75default: __bad_pda_field(); \
73 } \ 76 } \
74 ret__; }) 77 ret__; })
75 78