diff options
author | Fenghua Yu <fenghua.yu@intel.com> | 2008-04-04 14:05:59 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2008-04-04 14:05:59 -0400 |
commit | 2046b94e7c4fce92eb8165c2c36c6478f4927178 (patch) | |
tree | 0dbbdf17d64b521f2debcc8677368ceec8805d8c /arch/ia64/mm | |
parent | e315c121a858499d84dc88c499046b9f10bb61ec (diff) |
[IA64] Multiple outstanding ptc.g instruction support
According to SDM2.2, Itanium supports multiple outstanding ptc.g instructions.
But current kernel function ia64_global_tlb_purge() uses a spinlock to serialize
ptc.g instructions issued by multiple processors. This serialization might have
scalability issue on a big SMP machine where many processors could purge TLB
in parallel.
The patch fixes this problem by issuing multiple ptc.g instructions in
ia64_global_tlb_purge(). It also adds support for the "PALO" table to get
a platform view of the max number of outstanding ptc.g instructions (which
may be different from the processor view found from PAL_VM_SUMMARY).
PALO specification can be found at: http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
spinaphore implementation by Matthew Wilcox.
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/mm')
-rw-r--r-- | arch/ia64/mm/tlb.c | 125 |
1 files changed, 110 insertions, 15 deletions
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 655da240d13c..d41d6076ed03 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c | |||
@@ -11,6 +11,9 @@ | |||
11 | * Rohit Seth <rohit.seth@intel.com> | 11 | * Rohit Seth <rohit.seth@intel.com> |
12 | * Ken Chen <kenneth.w.chen@intel.com> | 12 | * Ken Chen <kenneth.w.chen@intel.com> |
13 | * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation | 13 | * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation |
14 | * Copyright (C) 2007 Intel Corp | ||
15 | * Fenghua Yu <fenghua.yu@intel.com> | ||
16 | * Add multiple ptc.g/ptc.ga instruction support in global tlb purge. | ||
14 | */ | 17 | */ |
15 | #include <linux/module.h> | 18 | #include <linux/module.h> |
16 | #include <linux/init.h> | 19 | #include <linux/init.h> |
@@ -26,6 +29,7 @@ | |||
26 | #include <asm/pal.h> | 29 | #include <asm/pal.h> |
27 | #include <asm/tlbflush.h> | 30 | #include <asm/tlbflush.h> |
28 | #include <asm/dma.h> | 31 | #include <asm/dma.h> |
32 | #include <asm/sal.h> | ||
29 | 33 | ||
30 | static struct { | 34 | static struct { |
31 | unsigned long mask; /* mask of supported purge page-sizes */ | 35 | unsigned long mask; /* mask of supported purge page-sizes */ |
@@ -84,14 +88,104 @@ wrap_mmu_context (struct mm_struct *mm) | |||
84 | local_flush_tlb_all(); | 88 | local_flush_tlb_all(); |
85 | } | 89 | } |
86 | 90 | ||
91 | /* | ||
92 | * Implement "spinaphores" ... like counting semaphores, but they | ||
93 | * spin instead of sleeping. If there are ever any other users for | ||
94 | * this primitive it can be moved up to a spinaphore.h header. | ||
95 | */ | ||
96 | struct spinaphore { | ||
97 | atomic_t cur; | ||
98 | }; | ||
99 | |||
100 | static inline void spinaphore_init(struct spinaphore *ss, int val) | ||
101 | { | ||
102 | atomic_set(&ss->cur, val); | ||
103 | } | ||
104 | |||
105 | static inline void down_spin(struct spinaphore *ss) | ||
106 | { | ||
107 | while (unlikely(!atomic_add_unless(&ss->cur, -1, 0))) | ||
108 | while (atomic_read(&ss->cur) == 0) | ||
109 | cpu_relax(); | ||
110 | } | ||
111 | |||
112 | static inline void up_spin(struct spinaphore *ss) | ||
113 | { | ||
114 | atomic_add(1, &ss->cur); | ||
115 | } | ||
116 | |||
117 | static struct spinaphore ptcg_sem; | ||
118 | static u16 nptcg = 1; | ||
119 | static int need_ptcg_sem = 1; | ||
120 | static int toolatetochangeptcgsem = 0; | ||
121 | |||
122 | /* | ||
123 | * Maximum number of simultaneous ptc.g purges in the system can | ||
124 | * be defined by PAL_VM_SUMMARY (in which case we should take | ||
125 | * the smallest value for any cpu in the system) or by the PAL | ||
126 | * override table (in which case we should ignore the value from | ||
127 | * PAL_VM_SUMMARY). | ||
128 | * | ||
129 | * Complicating the logic here is the fact that num_possible_cpus() | ||
130 | * isn't fully setup until we start bringing cpus online. | ||
131 | */ | ||
132 | void | ||
133 | setup_ptcg_sem(int max_purges, int from_palo) | ||
134 | { | ||
135 | static int have_palo; | ||
136 | static int firstcpu = 1; | ||
137 | |||
138 | if (toolatetochangeptcgsem) { | ||
139 | BUG_ON(max_purges < nptcg); | ||
140 | return; | ||
141 | } | ||
142 | |||
143 | if (from_palo) { | ||
144 | have_palo = 1; | ||
145 | |||
146 | /* In PALO max_purges == 0 really means it! */ | ||
147 | if (max_purges == 0) | ||
148 | panic("Whoa! Platform does not support global TLB purges.\n"); | ||
149 | nptcg = max_purges; | ||
150 | if (nptcg == PALO_MAX_TLB_PURGES) { | ||
151 | need_ptcg_sem = 0; | ||
152 | return; | ||
153 | } | ||
154 | goto resetsema; | ||
155 | } | ||
156 | if (have_palo) { | ||
157 | if (nptcg != PALO_MAX_TLB_PURGES) | ||
158 | need_ptcg_sem = (num_possible_cpus() > nptcg); | ||
159 | return; | ||
160 | } | ||
161 | |||
162 | /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */ | ||
163 | if (max_purges == 0) max_purges = 1; | ||
164 | |||
165 | if (firstcpu) { | ||
166 | nptcg = max_purges; | ||
167 | firstcpu = 0; | ||
168 | } | ||
169 | if (max_purges < nptcg) | ||
170 | nptcg = max_purges; | ||
171 | if (nptcg == PAL_MAX_PURGES) { | ||
172 | need_ptcg_sem = 0; | ||
173 | return; | ||
174 | } else | ||
175 | need_ptcg_sem = (num_possible_cpus() > nptcg); | ||
176 | |||
177 | resetsema: | ||
178 | spinaphore_init(&ptcg_sem, max_purges); | ||
179 | } | ||
180 | |||
87 | void | 181 | void |
88 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, | 182 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, |
89 | unsigned long end, unsigned long nbits) | 183 | unsigned long end, unsigned long nbits) |
90 | { | 184 | { |
91 | static DEFINE_SPINLOCK(ptcg_lock); | ||
92 | |||
93 | struct mm_struct *active_mm = current->active_mm; | 185 | struct mm_struct *active_mm = current->active_mm; |
94 | 186 | ||
187 | toolatetochangeptcgsem = 1; | ||
188 | |||
95 | if (mm != active_mm) { | 189 | if (mm != active_mm) { |
96 | /* Restore region IDs for mm */ | 190 | /* Restore region IDs for mm */ |
97 | if (mm && active_mm) { | 191 | if (mm && active_mm) { |
@@ -102,19 +196,20 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, | |||
102 | } | 196 | } |
103 | } | 197 | } |
104 | 198 | ||
105 | /* HW requires global serialization of ptc.ga. */ | 199 | if (need_ptcg_sem) |
106 | spin_lock(&ptcg_lock); | 200 | down_spin(&ptcg_sem); |
107 | { | 201 | |
108 | do { | 202 | do { |
109 | /* | 203 | /* |
110 | * Flush ALAT entries also. | 204 | * Flush ALAT entries also. |
111 | */ | 205 | */ |
112 | ia64_ptcga(start, (nbits<<2)); | 206 | ia64_ptcga(start, (nbits << 2)); |
113 | ia64_srlz_i(); | 207 | ia64_srlz_i(); |
114 | start += (1UL << nbits); | 208 | start += (1UL << nbits); |
115 | } while (start < end); | 209 | } while (start < end); |
116 | } | 210 | |
117 | spin_unlock(&ptcg_lock); | 211 | if (need_ptcg_sem) |
212 | up_spin(&ptcg_sem); | ||
118 | 213 | ||
119 | if (mm != active_mm) { | 214 | if (mm != active_mm) { |
120 | activate_context(active_mm); | 215 | activate_context(active_mm); |