diff options
Diffstat (limited to 'arch/ia64/sn/kernel/sn2/sn2_smp.c')
-rw-r--r-- | arch/ia64/sn/kernel/sn2/sn2_smp.c | 295 |
1 files changed, 295 insertions, 0 deletions
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c new file mode 100644 index 000000000000..7af05a7ac743 --- /dev/null +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c | |||
@@ -0,0 +1,295 @@ | |||
1 | /* | ||
2 | * SN2 Platform specific SMP Support | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General Public | ||
5 | * License. See the file "COPYING" in the main directory of this archive | ||
6 | * for more details. | ||
7 | * | ||
8 | * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. | ||
9 | */ | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/spinlock.h> | ||
14 | #include <linux/threads.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/interrupt.h> | ||
18 | #include <linux/irq.h> | ||
19 | #include <linux/mmzone.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/bitops.h> | ||
22 | #include <linux/nodemask.h> | ||
23 | |||
24 | #include <asm/processor.h> | ||
25 | #include <asm/irq.h> | ||
26 | #include <asm/sal.h> | ||
27 | #include <asm/system.h> | ||
28 | #include <asm/delay.h> | ||
29 | #include <asm/io.h> | ||
30 | #include <asm/smp.h> | ||
31 | #include <asm/tlb.h> | ||
32 | #include <asm/numa.h> | ||
33 | #include <asm/hw_irq.h> | ||
34 | #include <asm/current.h> | ||
35 | #include <asm/sn/sn_cpuid.h> | ||
36 | #include <asm/sn/sn_sal.h> | ||
37 | #include <asm/sn/addrs.h> | ||
38 | #include <asm/sn/shub_mmr.h> | ||
39 | #include <asm/sn/nodepda.h> | ||
40 | #include <asm/sn/rw_mmr.h> | ||
41 | |||
42 | void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0, | ||
43 | volatile unsigned long *, unsigned long data1); | ||
44 | |||
45 | static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); | ||
46 | |||
47 | static unsigned long sn2_ptc_deadlock_count; | ||
48 | |||
49 | static inline unsigned long wait_piowc(void) | ||
50 | { | ||
51 | volatile unsigned long *piows, zeroval; | ||
52 | unsigned long ws; | ||
53 | |||
54 | piows = pda->pio_write_status_addr; | ||
55 | zeroval = pda->pio_write_status_val; | ||
56 | do { | ||
57 | cpu_relax(); | ||
58 | } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval); | ||
59 | return ws; | ||
60 | } | ||
61 | |||
62 | void sn_tlb_migrate_finish(struct mm_struct *mm) | ||
63 | { | ||
64 | if (mm == current->mm) | ||
65 | flush_tlb_mm(mm); | ||
66 | } | ||
67 | |||
68 | /** | ||
69 | * sn2_global_tlb_purge - globally purge translation cache of virtual address range | ||
70 | * @start: start of virtual address range | ||
71 | * @end: end of virtual address range | ||
72 | * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) | ||
73 | * | ||
74 | * Purges the translation caches of all processors of the given virtual address | ||
75 | * range. | ||
76 | * | ||
77 | * Note: | ||
78 | * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. | ||
79 | * - cpu_vm_mask is converted into a nodemask of the nodes containing the | ||
80 | * cpus in cpu_vm_mask. | ||
81 | * - if only one bit is set in cpu_vm_mask & it is the current cpu, | ||
82 | * then only the local TLB needs to be flushed. This flushing can be done | ||
83 | * using ptc.l. This is the common case & avoids the global spinlock. | ||
84 | * - if multiple cpus have loaded the context, then flushing has to be | ||
85 | * done with ptc.g/MMRs under protection of the global ptc_lock. | ||
86 | */ | ||
87 | |||
88 | void | ||
89 | sn2_global_tlb_purge(unsigned long start, unsigned long end, | ||
90 | unsigned long nbits) | ||
91 | { | ||
92 | int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; | ||
93 | volatile unsigned long *ptc0, *ptc1; | ||
94 | unsigned long flags = 0, data0 = 0, data1 = 0; | ||
95 | struct mm_struct *mm = current->active_mm; | ||
96 | short nasids[MAX_NUMNODES], nix; | ||
97 | nodemask_t nodes_flushed; | ||
98 | |||
99 | nodes_clear(nodes_flushed); | ||
100 | i = 0; | ||
101 | |||
102 | for_each_cpu_mask(cpu, mm->cpu_vm_mask) { | ||
103 | cnode = cpu_to_node(cpu); | ||
104 | node_set(cnode, nodes_flushed); | ||
105 | lcpu = cpu; | ||
106 | i++; | ||
107 | } | ||
108 | |||
109 | preempt_disable(); | ||
110 | |||
111 | if (likely(i == 1 && lcpu == smp_processor_id())) { | ||
112 | do { | ||
113 | ia64_ptcl(start, nbits << 2); | ||
114 | start += (1UL << nbits); | ||
115 | } while (start < end); | ||
116 | ia64_srlz_i(); | ||
117 | preempt_enable(); | ||
118 | return; | ||
119 | } | ||
120 | |||
121 | if (atomic_read(&mm->mm_users) == 1) { | ||
122 | flush_tlb_mm(mm); | ||
123 | preempt_enable(); | ||
124 | return; | ||
125 | } | ||
126 | |||
127 | nix = 0; | ||
128 | for_each_node_mask(cnode, nodes_flushed) | ||
129 | nasids[nix++] = cnodeid_to_nasid(cnode); | ||
130 | |||
131 | shub1 = is_shub1(); | ||
132 | if (shub1) { | ||
133 | data0 = (1UL << SH1_PTC_0_A_SHFT) | | ||
134 | (nbits << SH1_PTC_0_PS_SHFT) | | ||
135 | ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) | | ||
136 | (1UL << SH1_PTC_0_START_SHFT); | ||
137 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); | ||
138 | ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); | ||
139 | } else { | ||
140 | data0 = (1UL << SH2_PTC_A_SHFT) | | ||
141 | (nbits << SH2_PTC_PS_SHFT) | | ||
142 | (1UL << SH2_PTC_START_SHFT); | ||
143 | ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + | ||
144 | ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) ); | ||
145 | ptc1 = NULL; | ||
146 | } | ||
147 | |||
148 | |||
149 | mynasid = get_nasid(); | ||
150 | |||
151 | spin_lock_irqsave(&sn2_global_ptc_lock, flags); | ||
152 | |||
153 | do { | ||
154 | if (shub1) | ||
155 | data1 = start | (1UL << SH1_PTC_1_START_SHFT); | ||
156 | else | ||
157 | data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); | ||
158 | for (i = 0; i < nix; i++) { | ||
159 | nasid = nasids[i]; | ||
160 | if (unlikely(nasid == mynasid)) { | ||
161 | ia64_ptcga(start, nbits << 2); | ||
162 | ia64_srlz_i(); | ||
163 | } else { | ||
164 | ptc0 = CHANGE_NASID(nasid, ptc0); | ||
165 | if (ptc1) | ||
166 | ptc1 = CHANGE_NASID(nasid, ptc1); | ||
167 | pio_atomic_phys_write_mmrs(ptc0, data0, ptc1, | ||
168 | data1); | ||
169 | flushed = 1; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | if (flushed | ||
174 | && (wait_piowc() & | ||
175 | SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) { | ||
176 | sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1); | ||
177 | } | ||
178 | |||
179 | start += (1UL << nbits); | ||
180 | |||
181 | } while (start < end); | ||
182 | |||
183 | spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); | ||
184 | |||
185 | preempt_enable(); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * sn2_ptc_deadlock_recovery | ||
190 | * | ||
191 | * Recover from PTC deadlocks conditions. Recovery requires stepping thru each | ||
192 | * TLB flush transaction. The recovery sequence is somewhat tricky & is | ||
193 | * coded in assembly language. | ||
194 | */ | ||
195 | void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0, | ||
196 | volatile unsigned long *ptc1, unsigned long data1) | ||
197 | { | ||
198 | extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, | ||
199 | volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); | ||
200 | int cnode, mycnode, nasid; | ||
201 | volatile unsigned long *piows; | ||
202 | volatile unsigned long zeroval; | ||
203 | |||
204 | sn2_ptc_deadlock_count++; | ||
205 | |||
206 | piows = pda->pio_write_status_addr; | ||
207 | zeroval = pda->pio_write_status_val; | ||
208 | |||
209 | mycnode = numa_node_id(); | ||
210 | |||
211 | for_each_online_node(cnode) { | ||
212 | if (is_headless_node(cnode) || cnode == mycnode) | ||
213 | continue; | ||
214 | nasid = cnodeid_to_nasid(cnode); | ||
215 | ptc0 = CHANGE_NASID(nasid, ptc0); | ||
216 | if (ptc1) | ||
217 | ptc1 = CHANGE_NASID(nasid, ptc1); | ||
218 | sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); | ||
219 | } | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * sn_send_IPI_phys - send an IPI to a Nasid and slice | ||
224 | * @nasid: nasid to receive the interrupt (may be outside partition) | ||
225 | * @physid: physical cpuid to receive the interrupt. | ||
226 | * @vector: command to send | ||
227 | * @delivery_mode: delivery mechanism | ||
228 | * | ||
229 | * Sends an IPI (interprocessor interrupt) to the processor specified by | ||
230 | * @physid | ||
231 | * | ||
232 | * @delivery_mode can be one of the following | ||
233 | * | ||
234 | * %IA64_IPI_DM_INT - pend an interrupt | ||
235 | * %IA64_IPI_DM_PMI - pend a PMI | ||
236 | * %IA64_IPI_DM_NMI - pend an NMI | ||
237 | * %IA64_IPI_DM_INIT - pend an INIT interrupt | ||
238 | */ | ||
239 | void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode) | ||
240 | { | ||
241 | long val; | ||
242 | unsigned long flags = 0; | ||
243 | volatile long *p; | ||
244 | |||
245 | p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT); | ||
246 | val = (1UL << SH_IPI_INT_SEND_SHFT) | | ||
247 | (physid << SH_IPI_INT_PID_SHFT) | | ||
248 | ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) | | ||
249 | ((long)vector << SH_IPI_INT_IDX_SHFT) | | ||
250 | (0x000feeUL << SH_IPI_INT_BASE_SHFT); | ||
251 | |||
252 | mb(); | ||
253 | if (enable_shub_wars_1_1()) { | ||
254 | spin_lock_irqsave(&sn2_global_ptc_lock, flags); | ||
255 | } | ||
256 | pio_phys_write_mmr(p, val); | ||
257 | if (enable_shub_wars_1_1()) { | ||
258 | wait_piowc(); | ||
259 | spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); | ||
260 | } | ||
261 | |||
262 | } | ||
263 | |||
264 | EXPORT_SYMBOL(sn_send_IPI_phys); | ||
265 | |||
266 | /** | ||
267 | * sn2_send_IPI - send an IPI to a processor | ||
268 | * @cpuid: target of the IPI | ||
269 | * @vector: command to send | ||
270 | * @delivery_mode: delivery mechanism | ||
271 | * @redirect: redirect the IPI? | ||
272 | * | ||
273 | * Sends an IPI (InterProcessor Interrupt) to the processor specified by | ||
274 | * @cpuid. @vector specifies the command to send, while @delivery_mode can | ||
275 | * be one of the following | ||
276 | * | ||
277 | * %IA64_IPI_DM_INT - pend an interrupt | ||
278 | * %IA64_IPI_DM_PMI - pend a PMI | ||
279 | * %IA64_IPI_DM_NMI - pend an NMI | ||
280 | * %IA64_IPI_DM_INIT - pend an INIT interrupt | ||
281 | */ | ||
282 | void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) | ||
283 | { | ||
284 | long physid; | ||
285 | int nasid; | ||
286 | |||
287 | physid = cpu_physical_id(cpuid); | ||
288 | nasid = cpuid_to_nasid(cpuid); | ||
289 | |||
290 | /* the following is used only when starting cpus at boot time */ | ||
291 | if (unlikely(nasid == -1)) | ||
292 | ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL); | ||
293 | |||
294 | sn_send_IPI_phys(nasid, physid, vector, delivery_mode); | ||
295 | } | ||