diff options
Diffstat (limited to 'arch/ia64/sn/kernel/xpc_partition.c')
-rw-r--r-- | arch/ia64/sn/kernel/xpc_partition.c | 984 |
1 files changed, 984 insertions, 0 deletions
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c new file mode 100644 index 000000000000..2c3c4a8af553 --- /dev/null +++ b/arch/ia64/sn/kernel/xpc_partition.c | |||
@@ -0,0 +1,984 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. | ||
7 | */ | ||
8 | |||
9 | |||
10 | /* | ||
11 | * Cross Partition Communication (XPC) partition support. | ||
12 | * | ||
13 | * This is the part of XPC that detects the presence/absence of | ||
14 | * other partitions. It provides a heartbeat and monitors the | ||
15 | * heartbeats of other partitions. | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | |||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/sysctl.h> | ||
22 | #include <linux/cache.h> | ||
23 | #include <linux/mmzone.h> | ||
24 | #include <linux/nodemask.h> | ||
25 | #include <asm/sn/bte.h> | ||
26 | #include <asm/sn/intr.h> | ||
27 | #include <asm/sn/sn_sal.h> | ||
28 | #include <asm/sn/nodepda.h> | ||
29 | #include <asm/sn/addrs.h> | ||
30 | #include "xpc.h" | ||
31 | |||
32 | |||
33 | /* XPC is exiting flag */ | ||
34 | int xpc_exiting; | ||
35 | |||
36 | |||
37 | /* SH_IPI_ACCESS shub register value on startup */ | ||
38 | static u64 xpc_sh1_IPI_access; | ||
39 | static u64 xpc_sh2_IPI_access0; | ||
40 | static u64 xpc_sh2_IPI_access1; | ||
41 | static u64 xpc_sh2_IPI_access2; | ||
42 | static u64 xpc_sh2_IPI_access3; | ||
43 | |||
44 | |||
45 | /* original protection values for each node */ | ||
46 | u64 xpc_prot_vec[MAX_COMPACT_NODES]; | ||
47 | |||
48 | |||
49 | /* this partition's reserved page */ | ||
50 | struct xpc_rsvd_page *xpc_rsvd_page; | ||
51 | |||
52 | /* this partition's XPC variables (within the reserved page) */ | ||
53 | struct xpc_vars *xpc_vars; | ||
54 | struct xpc_vars_part *xpc_vars_part; | ||
55 | |||
56 | |||
57 | /* | ||
58 | * For performance reasons, each entry of xpc_partitions[] is cacheline | ||
59 | * aligned. And xpc_partitions[] is padded with an additional entry at the | ||
60 | * end so that the last legitimate entry doesn't share its cacheline with | ||
61 | * another variable. | ||
62 | */ | ||
63 | struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1]; | ||
64 | |||
65 | |||
66 | /* | ||
67 | * Generic buffer used to store a local copy of the remote partitions | ||
68 | * reserved page or XPC variables. | ||
69 | * | ||
70 | * xpc_discovery runs only once and is a seperate thread that is | ||
71 | * very likely going to be processing in parallel with receiving | ||
72 | * interrupts. | ||
73 | */ | ||
74 | char ____cacheline_aligned | ||
75 | xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; | ||
76 | |||
77 | |||
78 | /* systune related variables */ | ||
79 | int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; | ||
80 | int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT; | ||
81 | |||
82 | |||
83 | /* | ||
84 | * Given a nasid, get the physical address of the partition's reserved page | ||
85 | * for that nasid. This function returns 0 on any error. | ||
86 | */ | ||
87 | static u64 | ||
88 | xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) | ||
89 | { | ||
90 | bte_result_t bte_res; | ||
91 | s64 status; | ||
92 | u64 cookie = 0; | ||
93 | u64 rp_pa = nasid; /* seed with nasid */ | ||
94 | u64 len = 0; | ||
95 | |||
96 | |||
97 | while (1) { | ||
98 | |||
99 | status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa, | ||
100 | &len); | ||
101 | |||
102 | dev_dbg(xpc_part, "SAL returned with status=%li, cookie=" | ||
103 | "0x%016lx, address=0x%016lx, len=0x%016lx\n", | ||
104 | status, cookie, rp_pa, len); | ||
105 | |||
106 | if (status != SALRET_MORE_PASSES) { | ||
107 | break; | ||
108 | } | ||
109 | |||
110 | if (len > buf_size) { | ||
111 | dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len); | ||
112 | status = SALRET_ERROR; | ||
113 | break; | ||
114 | } | ||
115 | |||
116 | bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size, | ||
117 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); | ||
118 | if (bte_res != BTE_SUCCESS) { | ||
119 | dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); | ||
120 | status = SALRET_ERROR; | ||
121 | break; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | if (status != SALRET_OK) { | ||
126 | rp_pa = 0; | ||
127 | } | ||
128 | dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa); | ||
129 | return rp_pa; | ||
130 | } | ||
131 | |||
132 | |||
133 | /* | ||
134 | * Fill the partition reserved page with the information needed by | ||
135 | * other partitions to discover we are alive and establish initial | ||
136 | * communications. | ||
137 | */ | ||
138 | struct xpc_rsvd_page * | ||
139 | xpc_rsvd_page_init(void) | ||
140 | { | ||
141 | struct xpc_rsvd_page *rp; | ||
142 | AMO_t *amos_page; | ||
143 | u64 rp_pa, next_cl, nasid_array = 0; | ||
144 | int i, ret; | ||
145 | |||
146 | |||
147 | /* get the local reserved page's address */ | ||
148 | |||
149 | rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0), | ||
150 | (u64) xpc_remote_copy_buffer, | ||
151 | XPC_RSVD_PAGE_ALIGNED_SIZE); | ||
152 | if (rp_pa == 0) { | ||
153 | dev_err(xpc_part, "SAL failed to locate the reserved page\n"); | ||
154 | return NULL; | ||
155 | } | ||
156 | rp = (struct xpc_rsvd_page *) __va(rp_pa); | ||
157 | |||
158 | if (rp->partid != sn_partition_id) { | ||
159 | dev_err(xpc_part, "the reserved page's partid of %d should be " | ||
160 | "%d\n", rp->partid, sn_partition_id); | ||
161 | return NULL; | ||
162 | } | ||
163 | |||
164 | rp->version = XPC_RP_VERSION; | ||
165 | |||
166 | /* | ||
167 | * Place the XPC variables on the cache line following the | ||
168 | * reserved page structure. | ||
169 | */ | ||
170 | next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE; | ||
171 | xpc_vars = (struct xpc_vars *) next_cl; | ||
172 | |||
173 | /* | ||
174 | * Before clearing xpc_vars, see if a page of AMOs had been previously | ||
175 | * allocated. If not we'll need to allocate one and set permissions | ||
176 | * so that cross-partition AMOs are allowed. | ||
177 | * | ||
178 | * The allocated AMO page needs MCA reporting to remain disabled after | ||
179 | * XPC has unloaded. To make this work, we keep a copy of the pointer | ||
180 | * to this page (i.e., amos_page) in the struct xpc_vars structure, | ||
181 | * which is pointed to by the reserved page, and re-use that saved copy | ||
182 | * on subsequent loads of XPC. This AMO page is never freed, and its | ||
183 | * memory protections are never restricted. | ||
184 | */ | ||
185 | if ((amos_page = xpc_vars->amos_page) == NULL) { | ||
186 | amos_page = (AMO_t *) mspec_kalloc_page(0); | ||
187 | if (amos_page == NULL) { | ||
188 | dev_err(xpc_part, "can't allocate page of AMOs\n"); | ||
189 | return NULL; | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems | ||
194 | * when xpc_allow_IPI_ops() is called via xpc_hb_init(). | ||
195 | */ | ||
196 | if (!enable_shub_wars_1_1()) { | ||
197 | ret = sn_change_memprotect(ia64_tpa((u64) amos_page), | ||
198 | PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1, | ||
199 | &nasid_array); | ||
200 | if (ret != 0) { | ||
201 | dev_err(xpc_part, "can't change memory " | ||
202 | "protections\n"); | ||
203 | mspec_kfree_page((unsigned long) amos_page); | ||
204 | return NULL; | ||
205 | } | ||
206 | } | ||
207 | } else if (!IS_AMO_ADDRESS((u64) amos_page)) { | ||
208 | /* | ||
209 | * EFI's XPBOOT can also set amos_page in the reserved page, | ||
210 | * but it happens to leave it as an uncached physical address | ||
211 | * and we need it to be an uncached virtual, so we'll have to | ||
212 | * convert it. | ||
213 | */ | ||
214 | if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) { | ||
215 | dev_err(xpc_part, "previously used amos_page address " | ||
216 | "is bad = 0x%p\n", (void *) amos_page); | ||
217 | return NULL; | ||
218 | } | ||
219 | amos_page = (AMO_t *) TO_AMO((u64) amos_page); | ||
220 | } | ||
221 | |||
222 | memset(xpc_vars, 0, sizeof(struct xpc_vars)); | ||
223 | |||
224 | /* | ||
225 | * Place the XPC per partition specific variables on the cache line | ||
226 | * following the XPC variables structure. | ||
227 | */ | ||
228 | next_cl += XPC_VARS_ALIGNED_SIZE; | ||
229 | memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) * | ||
230 | XP_MAX_PARTITIONS); | ||
231 | xpc_vars_part = (struct xpc_vars_part *) next_cl; | ||
232 | xpc_vars->vars_part_pa = __pa(next_cl); | ||
233 | |||
234 | xpc_vars->version = XPC_V_VERSION; | ||
235 | xpc_vars->act_nasid = cpuid_to_nasid(0); | ||
236 | xpc_vars->act_phys_cpuid = cpu_physical_id(0); | ||
237 | xpc_vars->amos_page = amos_page; /* save for next load of XPC */ | ||
238 | |||
239 | |||
240 | /* | ||
241 | * Initialize the activation related AMO variables. | ||
242 | */ | ||
243 | xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS); | ||
244 | for (i = 1; i < XP_NASID_MASK_WORDS; i++) { | ||
245 | xpc_IPI_init(i + XP_MAX_PARTITIONS); | ||
246 | } | ||
247 | /* export AMO page's physical address to other partitions */ | ||
248 | xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); | ||
249 | |||
250 | /* | ||
251 | * This signifies to the remote partition that our reserved | ||
252 | * page is initialized. | ||
253 | */ | ||
254 | (volatile u64) rp->vars_pa = __pa(xpc_vars); | ||
255 | |||
256 | return rp; | ||
257 | } | ||
258 | |||
259 | |||
260 | /* | ||
261 | * Change protections to allow IPI operations (and AMO operations on | ||
262 | * Shub 1.1 systems). | ||
263 | */ | ||
264 | void | ||
265 | xpc_allow_IPI_ops(void) | ||
266 | { | ||
267 | int node; | ||
268 | int nasid; | ||
269 | |||
270 | |||
271 | // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. | ||
272 | |||
273 | if (is_shub2()) { | ||
274 | xpc_sh2_IPI_access0 = | ||
275 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0)); | ||
276 | xpc_sh2_IPI_access1 = | ||
277 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1)); | ||
278 | xpc_sh2_IPI_access2 = | ||
279 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2)); | ||
280 | xpc_sh2_IPI_access3 = | ||
281 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3)); | ||
282 | |||
283 | for_each_online_node(node) { | ||
284 | nasid = cnodeid_to_nasid(node); | ||
285 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), | ||
286 | -1UL); | ||
287 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), | ||
288 | -1UL); | ||
289 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), | ||
290 | -1UL); | ||
291 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), | ||
292 | -1UL); | ||
293 | } | ||
294 | |||
295 | } else { | ||
296 | xpc_sh1_IPI_access = | ||
297 | (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS)); | ||
298 | |||
299 | for_each_online_node(node) { | ||
300 | nasid = cnodeid_to_nasid(node); | ||
301 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), | ||
302 | -1UL); | ||
303 | |||
304 | /* | ||
305 | * Since the BIST collides with memory operations on | ||
306 | * SHUB 1.1 sn_change_memprotect() cannot be used. | ||
307 | */ | ||
308 | if (enable_shub_wars_1_1()) { | ||
309 | /* open up everything */ | ||
310 | xpc_prot_vec[node] = (u64) HUB_L((u64 *) | ||
311 | GLOBAL_MMR_ADDR(nasid, | ||
312 | SH1_MD_DQLP_MMR_DIR_PRIVEC0)); | ||
313 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, | ||
314 | SH1_MD_DQLP_MMR_DIR_PRIVEC0), | ||
315 | -1UL); | ||
316 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, | ||
317 | SH1_MD_DQRP_MMR_DIR_PRIVEC0), | ||
318 | -1UL); | ||
319 | } | ||
320 | } | ||
321 | } | ||
322 | } | ||
323 | |||
324 | |||
325 | /* | ||
326 | * Restrict protections to disallow IPI operations (and AMO operations on | ||
327 | * Shub 1.1 systems). | ||
328 | */ | ||
329 | void | ||
330 | xpc_restrict_IPI_ops(void) | ||
331 | { | ||
332 | int node; | ||
333 | int nasid; | ||
334 | |||
335 | |||
336 | // >>> Change SH_IPI_ACCESS code to use SAL call once it is available. | ||
337 | |||
338 | if (is_shub2()) { | ||
339 | |||
340 | for_each_online_node(node) { | ||
341 | nasid = cnodeid_to_nasid(node); | ||
342 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0), | ||
343 | xpc_sh2_IPI_access0); | ||
344 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1), | ||
345 | xpc_sh2_IPI_access1); | ||
346 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2), | ||
347 | xpc_sh2_IPI_access2); | ||
348 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3), | ||
349 | xpc_sh2_IPI_access3); | ||
350 | } | ||
351 | |||
352 | } else { | ||
353 | |||
354 | for_each_online_node(node) { | ||
355 | nasid = cnodeid_to_nasid(node); | ||
356 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS), | ||
357 | xpc_sh1_IPI_access); | ||
358 | |||
359 | if (enable_shub_wars_1_1()) { | ||
360 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, | ||
361 | SH1_MD_DQLP_MMR_DIR_PRIVEC0), | ||
362 | xpc_prot_vec[node]); | ||
363 | HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, | ||
364 | SH1_MD_DQRP_MMR_DIR_PRIVEC0), | ||
365 | xpc_prot_vec[node]); | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | } | ||
370 | |||
371 | |||
372 | /* | ||
373 | * At periodic intervals, scan through all active partitions and ensure | ||
374 | * their heartbeat is still active. If not, the partition is deactivated. | ||
375 | */ | ||
376 | void | ||
377 | xpc_check_remote_hb(void) | ||
378 | { | ||
379 | struct xpc_vars *remote_vars; | ||
380 | struct xpc_partition *part; | ||
381 | partid_t partid; | ||
382 | bte_result_t bres; | ||
383 | |||
384 | |||
385 | remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; | ||
386 | |||
387 | for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { | ||
388 | if (partid == sn_partition_id) { | ||
389 | continue; | ||
390 | } | ||
391 | |||
392 | part = &xpc_partitions[partid]; | ||
393 | |||
394 | if (part->act_state == XPC_P_INACTIVE || | ||
395 | part->act_state == XPC_P_DEACTIVATING) { | ||
396 | continue; | ||
397 | } | ||
398 | |||
399 | /* pull the remote_hb cache line */ | ||
400 | bres = xp_bte_copy(part->remote_vars_pa, | ||
401 | ia64_tpa((u64) remote_vars), | ||
402 | XPC_VARS_ALIGNED_SIZE, | ||
403 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); | ||
404 | if (bres != BTE_SUCCESS) { | ||
405 | XPC_DEACTIVATE_PARTITION(part, | ||
406 | xpc_map_bte_errors(bres)); | ||
407 | continue; | ||
408 | } | ||
409 | |||
410 | dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" | ||
411 | " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid, | ||
412 | remote_vars->heartbeat, part->last_heartbeat, | ||
413 | remote_vars->kdb_status, | ||
414 | remote_vars->heartbeating_to_mask); | ||
415 | |||
416 | if (((remote_vars->heartbeat == part->last_heartbeat) && | ||
417 | (remote_vars->kdb_status == 0)) || | ||
418 | !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { | ||
419 | |||
420 | XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); | ||
421 | continue; | ||
422 | } | ||
423 | |||
424 | part->last_heartbeat = remote_vars->heartbeat; | ||
425 | } | ||
426 | } | ||
427 | |||
428 | |||
429 | /* | ||
430 | * Get a copy of the remote partition's rsvd page. | ||
431 | * | ||
432 | * remote_rp points to a buffer that is cacheline aligned for BTE copies and | ||
433 | * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE. | ||
434 | */ | ||
435 | static enum xpc_retval | ||
436 | xpc_get_remote_rp(int nasid, u64 *discovered_nasids, | ||
437 | struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa) | ||
438 | { | ||
439 | int bres, i; | ||
440 | |||
441 | |||
442 | /* get the reserved page's physical address */ | ||
443 | |||
444 | *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, | ||
445 | XPC_RSVD_PAGE_ALIGNED_SIZE); | ||
446 | if (*remote_rsvd_page_pa == 0) { | ||
447 | return xpcNoRsvdPageAddr; | ||
448 | } | ||
449 | |||
450 | |||
451 | /* pull over the reserved page structure */ | ||
452 | |||
453 | bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp), | ||
454 | XPC_RSVD_PAGE_ALIGNED_SIZE, | ||
455 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); | ||
456 | if (bres != BTE_SUCCESS) { | ||
457 | return xpc_map_bte_errors(bres); | ||
458 | } | ||
459 | |||
460 | |||
461 | if (discovered_nasids != NULL) { | ||
462 | for (i = 0; i < XP_NASID_MASK_WORDS; i++) { | ||
463 | discovered_nasids[i] |= remote_rp->part_nasids[i]; | ||
464 | } | ||
465 | } | ||
466 | |||
467 | |||
468 | /* check that the partid is for another partition */ | ||
469 | |||
470 | if (remote_rp->partid < 1 || | ||
471 | remote_rp->partid > (XP_MAX_PARTITIONS - 1)) { | ||
472 | return xpcInvalidPartid; | ||
473 | } | ||
474 | |||
475 | if (remote_rp->partid == sn_partition_id) { | ||
476 | return xpcLocalPartid; | ||
477 | } | ||
478 | |||
479 | |||
480 | if (XPC_VERSION_MAJOR(remote_rp->version) != | ||
481 | XPC_VERSION_MAJOR(XPC_RP_VERSION)) { | ||
482 | return xpcBadVersion; | ||
483 | } | ||
484 | |||
485 | return xpcSuccess; | ||
486 | } | ||
487 | |||
488 | |||
489 | /* | ||
490 | * Get a copy of the remote partition's XPC variables. | ||
491 | * | ||
492 | * remote_vars points to a buffer that is cacheline aligned for BTE copies and | ||
493 | * assumed to be of size XPC_VARS_ALIGNED_SIZE. | ||
494 | */ | ||
495 | static enum xpc_retval | ||
496 | xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) | ||
497 | { | ||
498 | int bres; | ||
499 | |||
500 | |||
501 | if (remote_vars_pa == 0) { | ||
502 | return xpcVarsNotSet; | ||
503 | } | ||
504 | |||
505 | |||
506 | /* pull over the cross partition variables */ | ||
507 | |||
508 | bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars), | ||
509 | XPC_VARS_ALIGNED_SIZE, | ||
510 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); | ||
511 | if (bres != BTE_SUCCESS) { | ||
512 | return xpc_map_bte_errors(bres); | ||
513 | } | ||
514 | |||
515 | if (XPC_VERSION_MAJOR(remote_vars->version) != | ||
516 | XPC_VERSION_MAJOR(XPC_V_VERSION)) { | ||
517 | return xpcBadVersion; | ||
518 | } | ||
519 | |||
520 | return xpcSuccess; | ||
521 | } | ||
522 | |||
523 | |||
524 | /* | ||
525 | * Prior code has determine the nasid which generated an IPI. Inspect | ||
526 | * that nasid to determine if its partition needs to be activated or | ||
527 | * deactivated. | ||
528 | * | ||
529 | * A partition is consider "awaiting activation" if our partition | ||
530 | * flags indicate it is not active and it has a heartbeat. A | ||
531 | * partition is considered "awaiting deactivation" if our partition | ||
532 | * flags indicate it is active but it has no heartbeat or it is not | ||
533 | * sending its heartbeat to us. | ||
534 | * | ||
535 | * To determine the heartbeat, the remote nasid must have a properly | ||
536 | * initialized reserved page. | ||
537 | */ | ||
538 | static void | ||
539 | xpc_identify_act_IRQ_req(int nasid) | ||
540 | { | ||
541 | struct xpc_rsvd_page *remote_rp; | ||
542 | struct xpc_vars *remote_vars; | ||
543 | u64 remote_rsvd_page_pa; | ||
544 | u64 remote_vars_pa; | ||
545 | partid_t partid; | ||
546 | struct xpc_partition *part; | ||
547 | enum xpc_retval ret; | ||
548 | |||
549 | |||
550 | /* pull over the reserved page structure */ | ||
551 | |||
552 | remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; | ||
553 | |||
554 | ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa); | ||
555 | if (ret != xpcSuccess) { | ||
556 | dev_warn(xpc_part, "unable to get reserved page from nasid %d, " | ||
557 | "which sent interrupt, reason=%d\n", nasid, ret); | ||
558 | return; | ||
559 | } | ||
560 | |||
561 | remote_vars_pa = remote_rp->vars_pa; | ||
562 | partid = remote_rp->partid; | ||
563 | part = &xpc_partitions[partid]; | ||
564 | |||
565 | |||
566 | /* pull over the cross partition variables */ | ||
567 | |||
568 | remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; | ||
569 | |||
570 | ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); | ||
571 | if (ret != xpcSuccess) { | ||
572 | |||
573 | dev_warn(xpc_part, "unable to get XPC variables from nasid %d, " | ||
574 | "which sent interrupt, reason=%d\n", nasid, ret); | ||
575 | |||
576 | XPC_DEACTIVATE_PARTITION(part, ret); | ||
577 | return; | ||
578 | } | ||
579 | |||
580 | |||
581 | part->act_IRQ_rcvd++; | ||
582 | |||
583 | dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = " | ||
584 | "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, | ||
585 | remote_vars->heartbeat, remote_vars->heartbeating_to_mask); | ||
586 | |||
587 | |||
588 | if (part->act_state == XPC_P_INACTIVE) { | ||
589 | |||
590 | part->remote_rp_pa = remote_rsvd_page_pa; | ||
591 | dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", | ||
592 | part->remote_rp_pa); | ||
593 | |||
594 | part->remote_vars_pa = remote_vars_pa; | ||
595 | dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", | ||
596 | part->remote_vars_pa); | ||
597 | |||
598 | part->last_heartbeat = remote_vars->heartbeat; | ||
599 | dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", | ||
600 | part->last_heartbeat); | ||
601 | |||
602 | part->remote_vars_part_pa = remote_vars->vars_part_pa; | ||
603 | dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", | ||
604 | part->remote_vars_part_pa); | ||
605 | |||
606 | part->remote_act_nasid = remote_vars->act_nasid; | ||
607 | dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", | ||
608 | part->remote_act_nasid); | ||
609 | |||
610 | part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; | ||
611 | dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n", | ||
612 | part->remote_act_phys_cpuid); | ||
613 | |||
614 | part->remote_amos_page_pa = remote_vars->amos_page_pa; | ||
615 | dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", | ||
616 | part->remote_amos_page_pa); | ||
617 | |||
618 | xpc_activate_partition(part); | ||
619 | |||
620 | } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa || | ||
621 | !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { | ||
622 | |||
623 | part->reactivate_nasid = nasid; | ||
624 | XPC_DEACTIVATE_PARTITION(part, xpcReactivating); | ||
625 | } | ||
626 | } | ||
627 | |||
628 | |||
629 | /* | ||
630 | * Loop through the activation AMO variables and process any bits | ||
631 | * which are set. Each bit indicates a nasid sending a partition | ||
632 | * activation or deactivation request. | ||
633 | * | ||
634 | * Return #of IRQs detected. | ||
635 | */ | ||
636 | int | ||
637 | xpc_identify_act_IRQ_sender(void) | ||
638 | { | ||
639 | int word, bit; | ||
640 | u64 nasid_mask; | ||
641 | u64 nasid; /* remote nasid */ | ||
642 | int n_IRQs_detected = 0; | ||
643 | AMO_t *act_amos; | ||
644 | struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page; | ||
645 | |||
646 | |||
647 | act_amos = xpc_vars->act_amos; | ||
648 | |||
649 | |||
650 | /* scan through act AMO variable looking for non-zero entries */ | ||
651 | for (word = 0; word < XP_NASID_MASK_WORDS; word++) { | ||
652 | |||
653 | nasid_mask = xpc_IPI_receive(&act_amos[word]); | ||
654 | if (nasid_mask == 0) { | ||
655 | /* no IRQs from nasids in this variable */ | ||
656 | continue; | ||
657 | } | ||
658 | |||
659 | dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word, | ||
660 | nasid_mask); | ||
661 | |||
662 | |||
663 | /* | ||
664 | * If this nasid has been added to the machine since | ||
665 | * our partition was reset, this will retain the | ||
666 | * remote nasid in our reserved pages machine mask. | ||
667 | * This is used in the event of module reload. | ||
668 | */ | ||
669 | rp->mach_nasids[word] |= nasid_mask; | ||
670 | |||
671 | |||
672 | /* locate the nasid(s) which sent interrupts */ | ||
673 | |||
674 | for (bit = 0; bit < (8 * sizeof(u64)); bit++) { | ||
675 | if (nasid_mask & (1UL << bit)) { | ||
676 | n_IRQs_detected++; | ||
677 | nasid = XPC_NASID_FROM_W_B(word, bit); | ||
678 | dev_dbg(xpc_part, "interrupt from nasid %ld\n", | ||
679 | nasid); | ||
680 | xpc_identify_act_IRQ_req(nasid); | ||
681 | } | ||
682 | } | ||
683 | } | ||
684 | return n_IRQs_detected; | ||
685 | } | ||
686 | |||
687 | |||
688 | /* | ||
689 | * Mark specified partition as active. | ||
690 | */ | ||
691 | enum xpc_retval | ||
692 | xpc_mark_partition_active(struct xpc_partition *part) | ||
693 | { | ||
694 | unsigned long irq_flags; | ||
695 | enum xpc_retval ret; | ||
696 | |||
697 | |||
698 | dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part)); | ||
699 | |||
700 | spin_lock_irqsave(&part->act_lock, irq_flags); | ||
701 | if (part->act_state == XPC_P_ACTIVATING) { | ||
702 | part->act_state = XPC_P_ACTIVE; | ||
703 | ret = xpcSuccess; | ||
704 | } else { | ||
705 | DBUG_ON(part->reason == xpcSuccess); | ||
706 | ret = part->reason; | ||
707 | } | ||
708 | spin_unlock_irqrestore(&part->act_lock, irq_flags); | ||
709 | |||
710 | return ret; | ||
711 | } | ||
712 | |||
713 | |||
714 | /* | ||
715 | * Notify XPC that the partition is down. | ||
716 | */ | ||
717 | void | ||
718 | xpc_deactivate_partition(const int line, struct xpc_partition *part, | ||
719 | enum xpc_retval reason) | ||
720 | { | ||
721 | unsigned long irq_flags; | ||
722 | partid_t partid = XPC_PARTID(part); | ||
723 | |||
724 | |||
725 | spin_lock_irqsave(&part->act_lock, irq_flags); | ||
726 | |||
727 | if (part->act_state == XPC_P_INACTIVE) { | ||
728 | XPC_SET_REASON(part, reason, line); | ||
729 | spin_unlock_irqrestore(&part->act_lock, irq_flags); | ||
730 | if (reason == xpcReactivating) { | ||
731 | /* we interrupt ourselves to reactivate partition */ | ||
732 | xpc_IPI_send_reactivate(part); | ||
733 | } | ||
734 | return; | ||
735 | } | ||
736 | if (part->act_state == XPC_P_DEACTIVATING) { | ||
737 | if ((part->reason == xpcUnloading && reason != xpcUnloading) || | ||
738 | reason == xpcReactivating) { | ||
739 | XPC_SET_REASON(part, reason, line); | ||
740 | } | ||
741 | spin_unlock_irqrestore(&part->act_lock, irq_flags); | ||
742 | return; | ||
743 | } | ||
744 | |||
745 | part->act_state = XPC_P_DEACTIVATING; | ||
746 | XPC_SET_REASON(part, reason, line); | ||
747 | |||
748 | spin_unlock_irqrestore(&part->act_lock, irq_flags); | ||
749 | |||
750 | XPC_DISALLOW_HB(partid, xpc_vars); | ||
751 | |||
752 | dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, | ||
753 | reason); | ||
754 | |||
755 | xpc_partition_down(part, reason); | ||
756 | } | ||
757 | |||
758 | |||
759 | /* | ||
760 | * Mark specified partition as active. | ||
761 | */ | ||
762 | void | ||
763 | xpc_mark_partition_inactive(struct xpc_partition *part) | ||
764 | { | ||
765 | unsigned long irq_flags; | ||
766 | |||
767 | |||
768 | dev_dbg(xpc_part, "setting partition %d to INACTIVE\n", | ||
769 | XPC_PARTID(part)); | ||
770 | |||
771 | spin_lock_irqsave(&part->act_lock, irq_flags); | ||
772 | part->act_state = XPC_P_INACTIVE; | ||
773 | spin_unlock_irqrestore(&part->act_lock, irq_flags); | ||
774 | part->remote_rp_pa = 0; | ||
775 | } | ||
776 | |||
777 | |||
778 | /* | ||
779 | * SAL has provided a partition and machine mask. The partition mask | ||
780 | * contains a bit for each even nasid in our partition. The machine | ||
781 | * mask contains a bit for each even nasid in the entire machine. | ||
782 | * | ||
783 | * Using those two bit arrays, we can determine which nasids are | ||
784 | * known in the machine. Each should also have a reserved page | ||
785 | * initialized if they are available for partitioning. | ||
786 | */ | ||
787 | void | ||
788 | xpc_discovery(void) | ||
789 | { | ||
790 | void *remote_rp_base; | ||
791 | struct xpc_rsvd_page *remote_rp; | ||
792 | struct xpc_vars *remote_vars; | ||
793 | u64 remote_rsvd_page_pa; | ||
794 | u64 remote_vars_pa; | ||
795 | int region; | ||
796 | int max_regions; | ||
797 | int nasid; | ||
798 | struct xpc_rsvd_page *rp; | ||
799 | partid_t partid; | ||
800 | struct xpc_partition *part; | ||
801 | u64 *discovered_nasids; | ||
802 | enum xpc_retval ret; | ||
803 | |||
804 | |||
805 | remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE, | ||
806 | GFP_KERNEL, &remote_rp_base); | ||
807 | if (remote_rp == NULL) { | ||
808 | return; | ||
809 | } | ||
810 | remote_vars = (struct xpc_vars *) remote_rp; | ||
811 | |||
812 | |||
813 | discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS, | ||
814 | GFP_KERNEL); | ||
815 | if (discovered_nasids == NULL) { | ||
816 | kfree(remote_rp_base); | ||
817 | return; | ||
818 | } | ||
819 | memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS); | ||
820 | |||
821 | rp = (struct xpc_rsvd_page *) xpc_rsvd_page; | ||
822 | |||
823 | /* | ||
824 | * The term 'region' in this context refers to the minimum number of | ||
825 | * nodes that can comprise an access protection grouping. The access | ||
826 | * protection is in regards to memory, IOI and IPI. | ||
827 | */ | ||
828 | //>>> move the next two #defines into either include/asm-ia64/sn/arch.h or | ||
829 | //>>> include/asm-ia64/sn/addrs.h | ||
830 | #define SH1_MAX_REGIONS 64 | ||
831 | #define SH2_MAX_REGIONS 256 | ||
832 | max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS; | ||
833 | |||
834 | for (region = 0; region < max_regions; region++) { | ||
835 | |||
836 | if ((volatile int) xpc_exiting) { | ||
837 | break; | ||
838 | } | ||
839 | |||
840 | dev_dbg(xpc_part, "searching region %d\n", region); | ||
841 | |||
842 | for (nasid = (region * sn_region_size * 2); | ||
843 | nasid < ((region + 1) * sn_region_size * 2); | ||
844 | nasid += 2) { | ||
845 | |||
846 | if ((volatile int) xpc_exiting) { | ||
847 | break; | ||
848 | } | ||
849 | |||
850 | dev_dbg(xpc_part, "checking nasid %d\n", nasid); | ||
851 | |||
852 | |||
853 | if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) { | ||
854 | dev_dbg(xpc_part, "PROM indicates Nasid %d is " | ||
855 | "part of the local partition; skipping " | ||
856 | "region\n", nasid); | ||
857 | break; | ||
858 | } | ||
859 | |||
860 | if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) { | ||
861 | dev_dbg(xpc_part, "PROM indicates Nasid %d was " | ||
862 | "not on Numa-Link network at reset\n", | ||
863 | nasid); | ||
864 | continue; | ||
865 | } | ||
866 | |||
867 | if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) { | ||
868 | dev_dbg(xpc_part, "Nasid %d is part of a " | ||
869 | "partition which was previously " | ||
870 | "discovered\n", nasid); | ||
871 | continue; | ||
872 | } | ||
873 | |||
874 | |||
875 | /* pull over the reserved page structure */ | ||
876 | |||
877 | ret = xpc_get_remote_rp(nasid, discovered_nasids, | ||
878 | remote_rp, &remote_rsvd_page_pa); | ||
879 | if (ret != xpcSuccess) { | ||
880 | dev_dbg(xpc_part, "unable to get reserved page " | ||
881 | "from nasid %d, reason=%d\n", nasid, | ||
882 | ret); | ||
883 | |||
884 | if (ret == xpcLocalPartid) { | ||
885 | break; | ||
886 | } | ||
887 | continue; | ||
888 | } | ||
889 | |||
890 | remote_vars_pa = remote_rp->vars_pa; | ||
891 | |||
892 | partid = remote_rp->partid; | ||
893 | part = &xpc_partitions[partid]; | ||
894 | |||
895 | |||
896 | /* pull over the cross partition variables */ | ||
897 | |||
898 | ret = xpc_get_remote_vars(remote_vars_pa, remote_vars); | ||
899 | if (ret != xpcSuccess) { | ||
900 | dev_dbg(xpc_part, "unable to get XPC variables " | ||
901 | "from nasid %d, reason=%d\n", nasid, | ||
902 | ret); | ||
903 | |||
904 | XPC_DEACTIVATE_PARTITION(part, ret); | ||
905 | continue; | ||
906 | } | ||
907 | |||
908 | if (part->act_state != XPC_P_INACTIVE) { | ||
909 | dev_dbg(xpc_part, "partition %d on nasid %d is " | ||
910 | "already activating\n", partid, nasid); | ||
911 | break; | ||
912 | } | ||
913 | |||
914 | /* | ||
915 | * Register the remote partition's AMOs with SAL so it | ||
916 | * can handle and cleanup errors within that address | ||
917 | * range should the remote partition go down. We don't | ||
918 | * unregister this range because it is difficult to | ||
919 | * tell when outstanding writes to the remote partition | ||
920 | * are finished and thus when it is thus safe to | ||
921 | * unregister. This should not result in wasted space | ||
922 | * in the SAL xp_addr_region table because we should | ||
923 | * get the same page for remote_act_amos_pa after | ||
924 | * module reloads and system reboots. | ||
925 | */ | ||
926 | if (sn_register_xp_addr_region( | ||
927 | remote_vars->amos_page_pa, | ||
928 | PAGE_SIZE, 1) < 0) { | ||
929 | dev_dbg(xpc_part, "partition %d failed to " | ||
930 | "register xp_addr region 0x%016lx\n", | ||
931 | partid, remote_vars->amos_page_pa); | ||
932 | |||
933 | XPC_SET_REASON(part, xpcPhysAddrRegFailed, | ||
934 | __LINE__); | ||
935 | break; | ||
936 | } | ||
937 | |||
938 | /* | ||
939 | * The remote nasid is valid and available. | ||
940 | * Send an interrupt to that nasid to notify | ||
941 | * it that we are ready to begin activation. | ||
942 | */ | ||
943 | dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, " | ||
944 | "nasid %d, phys_cpuid 0x%x\n", | ||
945 | remote_vars->amos_page_pa, | ||
946 | remote_vars->act_nasid, | ||
947 | remote_vars->act_phys_cpuid); | ||
948 | |||
949 | xpc_IPI_send_activate(remote_vars); | ||
950 | } | ||
951 | } | ||
952 | |||
953 | kfree(discovered_nasids); | ||
954 | kfree(remote_rp_base); | ||
955 | } | ||
956 | |||
957 | |||
958 | /* | ||
959 | * Given a partid, get the nasids owned by that partition from the | ||
960 | * remote partition's reserved page. | ||
961 | */ | ||
962 | enum xpc_retval | ||
963 | xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask) | ||
964 | { | ||
965 | struct xpc_partition *part; | ||
966 | u64 part_nasid_pa; | ||
967 | int bte_res; | ||
968 | |||
969 | |||
970 | part = &xpc_partitions[partid]; | ||
971 | if (part->remote_rp_pa == 0) { | ||
972 | return xpcPartitionDown; | ||
973 | } | ||
974 | |||
975 | part_nasid_pa = part->remote_rp_pa + | ||
976 | (u64) &((struct xpc_rsvd_page *) 0)->part_nasids; | ||
977 | |||
978 | bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask), | ||
979 | L1_CACHE_ALIGN(XP_NASID_MASK_BYTES), | ||
980 | (BTE_NOTIFY | BTE_WACQUIRE), NULL); | ||
981 | |||
982 | return xpc_map_bte_errors(bte_res); | ||
983 | } | ||
984 | |||