aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/sn/kernel/xpc_partition.c
diff options
context:
space:
mode:
authorDean Nelson <dcn@sgi.com>2005-03-23 21:50:00 -0500
committerTony Luck <tony.luck@intel.com>2005-05-03 15:36:00 -0400
commit89eb8eb927e324366c3ac0458998aaf9953fc5cd (patch)
treec5f77d88bc42821134de6ea49a5663654df38e56 /arch/ia64/sn/kernel/xpc_partition.c
parent21223a9e78050919499d3d9039170e608eb939cc (diff)
[IA64-SGI] SGI Altix cross partition functionality [2/3]
This patch contains the communication module (XPC) for cross partition communication on a partitioned SGI Altix. Signed-off-by: Dean Nelson <dcn@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/sn/kernel/xpc_partition.c')
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c971
1 files changed, 971 insertions, 0 deletions
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
new file mode 100644
index 000000000000..b31d9988a37a
--- /dev/null
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -0,0 +1,971 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition Communication (XPC) partition support.
12 *
13 * This is the part of XPC that detects the presence/absence of
14 * other partitions. It provides a heartbeat and monitors the
15 * heartbeats of other partitions.
16 *
17 */
18
19
20#include <linux/kernel.h>
21#include <linux/sysctl.h>
22#include <linux/cache.h>
23#include <linux/mmzone.h>
24#include <linux/nodemask.h>
25#include <asm/sn/bte.h>
26#include <asm/sn/intr.h>
27#include <asm/sn/sn_sal.h>
28#include <asm/sn/nodepda.h>
29#include <asm/sn/addrs.h>
30#include "xpc.h"
31
32
33/* XPC is exiting flag */
34int xpc_exiting;
35
36
37/* SH_IPI_ACCESS shub register value on startup */
38static u64 xpc_sh1_IPI_access;
39static u64 xpc_sh2_IPI_access0;
40static u64 xpc_sh2_IPI_access1;
41static u64 xpc_sh2_IPI_access2;
42static u64 xpc_sh2_IPI_access3;
43
44
45/* original protection values for each node */
46u64 xpc_prot_vec[MAX_COMPACT_NODES];
47
48
49/* this partition's reserved page */
50struct xpc_rsvd_page *xpc_rsvd_page;
51
52/* this partition's XPC variables (within the reserved page) */
53struct xpc_vars *xpc_vars;
54struct xpc_vars_part *xpc_vars_part;
55
56
57/*
58 * For performance reasons, each entry of xpc_partitions[] is cacheline
59 * aligned. And xpc_partitions[] is padded with an additional entry at the
60 * end so that the last legitimate entry doesn't share its cacheline with
61 * another variable.
62 */
63struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
64
65
66/*
67 * Generic buffer used to store a local copy of the remote partitions
68 * reserved page or XPC variables.
69 *
70 * xpc_discovery runs only once and is a seperate thread that is
71 * very likely going to be processing in parallel with receiving
72 * interrupts.
73 */
74char ____cacheline_aligned
75 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
76
77
78/* systune related variables */
79int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
80int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
81
82
83/*
84 * Given a nasid, get the physical address of the partition's reserved page
85 * for that nasid. This function returns 0 on any error.
86 */
87static u64
88xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
89{
90 bte_result_t bte_res;
91 s64 status;
92 u64 cookie = 0;
93 u64 rp_pa = nasid; /* seed with nasid */
94 u64 len = 0;
95
96
97 while (1) {
98
99 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
100 &len);
101
102 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
103 "0x%016lx, address=0x%016lx, len=0x%016lx\n",
104 status, cookie, rp_pa, len);
105
106 if (status != SALRET_MORE_PASSES) {
107 break;
108 }
109
110 if (len > buf_size) {
111 dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
112 status = SALRET_ERROR;
113 break;
114 }
115
116 bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
117 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
118 if (bte_res != BTE_SUCCESS) {
119 dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
120 status = SALRET_ERROR;
121 break;
122 }
123 }
124
125 if (status != SALRET_OK) {
126 rp_pa = 0;
127 }
128 dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
129 return rp_pa;
130}
131
132
133/*
134 * Fill the partition reserved page with the information needed by
135 * other partitions to discover we are alive and establish initial
136 * communications.
137 */
138struct xpc_rsvd_page *
139xpc_rsvd_page_init(void)
140{
141 struct xpc_rsvd_page *rp;
142 AMO_t *amos_page;
143 u64 rp_pa, next_cl, nasid_array = 0;
144 int i, ret;
145
146
147 /* get the local reserved page's address */
148
149 rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
150 (u64) xpc_remote_copy_buffer,
151 XPC_RSVD_PAGE_ALIGNED_SIZE);
152 if (rp_pa == 0) {
153 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
154 return NULL;
155 }
156 rp = (struct xpc_rsvd_page *) __va(rp_pa);
157
158 if (rp->partid != sn_partition_id) {
159 dev_err(xpc_part, "the reserved page's partid of %d should be "
160 "%d\n", rp->partid, sn_partition_id);
161 return NULL;
162 }
163
164 rp->version = XPC_RP_VERSION;
165
166 /*
167 * Place the XPC variables on the cache line following the
168 * reserved page structure.
169 */
170 next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
171 xpc_vars = (struct xpc_vars *) next_cl;
172
173 /*
174 * Before clearing xpc_vars, see if a page of AMOs had been previously
175 * allocated. If not we'll need to allocate one and set permissions
176 * so that cross-partition AMOs are allowed.
177 *
178 * The allocated AMO page needs MCA reporting to remain disabled after
179 * XPC has unloaded. To make this work, we keep a copy of the pointer
180 * to this page (i.e., amos_page) in the struct xpc_vars structure,
181 * which is pointed to by the reserved page, and re-use that saved copy
182 * on subsequent loads of XPC. This AMO page is never freed, and its
183 * memory protections are never restricted.
184 */
185 if ((amos_page = xpc_vars->amos_page) == NULL) {
186 amos_page = (AMO_t *) mspec_kalloc_page(0);
187 if (amos_page == NULL) {
188 dev_err(xpc_part, "can't allocate page of AMOs\n");
189 return NULL;
190 }
191
192 /*
193 * Open up AMO-R/W to cpu. This is done for Shub 1.1 systems
194 * when xpc_allow_IPI_ops() is called via xpc_hb_init().
195 */
196 if (!enable_shub_wars_1_1()) {
197 ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
198 PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
199 &nasid_array);
200 if (ret != 0) {
201 dev_err(xpc_part, "can't change memory "
202 "protections\n");
203 mspec_kfree_page((unsigned long) amos_page);
204 return NULL;
205 }
206 }
207 }
208
209 memset(xpc_vars, 0, sizeof(struct xpc_vars));
210
211 /*
212 * Place the XPC per partition specific variables on the cache line
213 * following the XPC variables structure.
214 */
215 next_cl += XPC_VARS_ALIGNED_SIZE;
216 memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
217 XP_MAX_PARTITIONS);
218 xpc_vars_part = (struct xpc_vars_part *) next_cl;
219 xpc_vars->vars_part_pa = __pa(next_cl);
220
221 xpc_vars->version = XPC_V_VERSION;
222 xpc_vars->act_nasid = cpuid_to_nasid(0);
223 xpc_vars->act_phys_cpuid = cpu_physical_id(0);
224 xpc_vars->amos_page = amos_page; /* save for next load of XPC */
225
226
227 /*
228 * Initialize the activation related AMO variables.
229 */
230 xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
231 for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
232 xpc_IPI_init(i + XP_MAX_PARTITIONS);
233 }
234 /* export AMO page's physical address to other partitions */
235 xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
236
237 /*
238 * This signifies to the remote partition that our reserved
239 * page is initialized.
240 */
241 (volatile u64) rp->vars_pa = __pa(xpc_vars);
242
243 return rp;
244}
245
246
247/*
248 * Change protections to allow IPI operations (and AMO operations on
249 * Shub 1.1 systems).
250 */
251void
252xpc_allow_IPI_ops(void)
253{
254 int node;
255 int nasid;
256
257
258 // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
259
260 if (is_shub2()) {
261 xpc_sh2_IPI_access0 =
262 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
263 xpc_sh2_IPI_access1 =
264 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
265 xpc_sh2_IPI_access2 =
266 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
267 xpc_sh2_IPI_access3 =
268 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
269
270 for_each_online_node(node) {
271 nasid = cnodeid_to_nasid(node);
272 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
273 -1UL);
274 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
275 -1UL);
276 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
277 -1UL);
278 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
279 -1UL);
280 }
281
282 } else {
283 xpc_sh1_IPI_access =
284 (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
285
286 for_each_online_node(node) {
287 nasid = cnodeid_to_nasid(node);
288 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
289 -1UL);
290
291 /*
292 * Since the BIST collides with memory operations on
293 * SHUB 1.1 sn_change_memprotect() cannot be used.
294 */
295 if (enable_shub_wars_1_1()) {
296 /* open up everything */
297 xpc_prot_vec[node] = (u64) HUB_L((u64 *)
298 GLOBAL_MMR_ADDR(nasid,
299 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
300 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
301 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
302 -1UL);
303 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
304 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
305 -1UL);
306 }
307 }
308 }
309}
310
311
312/*
313 * Restrict protections to disallow IPI operations (and AMO operations on
314 * Shub 1.1 systems).
315 */
316void
317xpc_restrict_IPI_ops(void)
318{
319 int node;
320 int nasid;
321
322
323 // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
324
325 if (is_shub2()) {
326
327 for_each_online_node(node) {
328 nasid = cnodeid_to_nasid(node);
329 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
330 xpc_sh2_IPI_access0);
331 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
332 xpc_sh2_IPI_access1);
333 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
334 xpc_sh2_IPI_access2);
335 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
336 xpc_sh2_IPI_access3);
337 }
338
339 } else {
340
341 for_each_online_node(node) {
342 nasid = cnodeid_to_nasid(node);
343 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
344 xpc_sh1_IPI_access);
345
346 if (enable_shub_wars_1_1()) {
347 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
348 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
349 xpc_prot_vec[node]);
350 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
351 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
352 xpc_prot_vec[node]);
353 }
354 }
355 }
356}
357
358
359/*
360 * At periodic intervals, scan through all active partitions and ensure
361 * their heartbeat is still active. If not, the partition is deactivated.
362 */
363void
364xpc_check_remote_hb(void)
365{
366 struct xpc_vars *remote_vars;
367 struct xpc_partition *part;
368 partid_t partid;
369 bte_result_t bres;
370
371
372 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
373
374 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
375 if (partid == sn_partition_id) {
376 continue;
377 }
378
379 part = &xpc_partitions[partid];
380
381 if (part->act_state == XPC_P_INACTIVE ||
382 part->act_state == XPC_P_DEACTIVATING) {
383 continue;
384 }
385
386 /* pull the remote_hb cache line */
387 bres = xp_bte_copy(part->remote_vars_pa,
388 ia64_tpa((u64) remote_vars),
389 XPC_VARS_ALIGNED_SIZE,
390 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
391 if (bres != BTE_SUCCESS) {
392 XPC_DEACTIVATE_PARTITION(part,
393 xpc_map_bte_errors(bres));
394 continue;
395 }
396
397 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
398 " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid,
399 remote_vars->heartbeat, part->last_heartbeat,
400 remote_vars->kdb_status,
401 remote_vars->heartbeating_to_mask);
402
403 if (((remote_vars->heartbeat == part->last_heartbeat) &&
404 (remote_vars->kdb_status == 0)) ||
405 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
406
407 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
408 continue;
409 }
410
411 part->last_heartbeat = remote_vars->heartbeat;
412 }
413}
414
415
416/*
417 * Get a copy of the remote partition's rsvd page.
418 *
419 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
420 * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
421 */
422static enum xpc_retval
423xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
424 struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
425{
426 int bres, i;
427
428
429 /* get the reserved page's physical address */
430
431 *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
432 XPC_RSVD_PAGE_ALIGNED_SIZE);
433 if (*remote_rsvd_page_pa == 0) {
434 return xpcNoRsvdPageAddr;
435 }
436
437
438 /* pull over the reserved page structure */
439
440 bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
441 XPC_RSVD_PAGE_ALIGNED_SIZE,
442 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
443 if (bres != BTE_SUCCESS) {
444 return xpc_map_bte_errors(bres);
445 }
446
447
448 if (discovered_nasids != NULL) {
449 for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
450 discovered_nasids[i] |= remote_rp->part_nasids[i];
451 }
452 }
453
454
455 /* check that the partid is for another partition */
456
457 if (remote_rp->partid < 1 ||
458 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
459 return xpcInvalidPartid;
460 }
461
462 if (remote_rp->partid == sn_partition_id) {
463 return xpcLocalPartid;
464 }
465
466
467 if (XPC_VERSION_MAJOR(remote_rp->version) !=
468 XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
469 return xpcBadVersion;
470 }
471
472 return xpcSuccess;
473}
474
475
476/*
477 * Get a copy of the remote partition's XPC variables.
478 *
479 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
480 * assumed to be of size XPC_VARS_ALIGNED_SIZE.
481 */
482static enum xpc_retval
483xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
484{
485 int bres;
486
487
488 if (remote_vars_pa == 0) {
489 return xpcVarsNotSet;
490 }
491
492
493 /* pull over the cross partition variables */
494
495 bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
496 XPC_VARS_ALIGNED_SIZE,
497 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
498 if (bres != BTE_SUCCESS) {
499 return xpc_map_bte_errors(bres);
500 }
501
502 if (XPC_VERSION_MAJOR(remote_vars->version) !=
503 XPC_VERSION_MAJOR(XPC_V_VERSION)) {
504 return xpcBadVersion;
505 }
506
507 return xpcSuccess;
508}
509
510
511/*
512 * Prior code has determine the nasid which generated an IPI. Inspect
513 * that nasid to determine if its partition needs to be activated or
514 * deactivated.
515 *
516 * A partition is consider "awaiting activation" if our partition
517 * flags indicate it is not active and it has a heartbeat. A
518 * partition is considered "awaiting deactivation" if our partition
519 * flags indicate it is active but it has no heartbeat or it is not
520 * sending its heartbeat to us.
521 *
522 * To determine the heartbeat, the remote nasid must have a properly
523 * initialized reserved page.
524 */
525static void
526xpc_identify_act_IRQ_req(int nasid)
527{
528 struct xpc_rsvd_page *remote_rp;
529 struct xpc_vars *remote_vars;
530 u64 remote_rsvd_page_pa;
531 u64 remote_vars_pa;
532 partid_t partid;
533 struct xpc_partition *part;
534 enum xpc_retval ret;
535
536
537 /* pull over the reserved page structure */
538
539 remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
540
541 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
542 if (ret != xpcSuccess) {
543 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
544 "which sent interrupt, reason=%d\n", nasid, ret);
545 return;
546 }
547
548 remote_vars_pa = remote_rp->vars_pa;
549 partid = remote_rp->partid;
550 part = &xpc_partitions[partid];
551
552
553 /* pull over the cross partition variables */
554
555 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
556
557 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
558 if (ret != xpcSuccess) {
559
560 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
561 "which sent interrupt, reason=%d\n", nasid, ret);
562
563 XPC_DEACTIVATE_PARTITION(part, ret);
564 return;
565 }
566
567
568 part->act_IRQ_rcvd++;
569
570 dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
571 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
572 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
573
574
575 if (part->act_state == XPC_P_INACTIVE) {
576
577 part->remote_rp_pa = remote_rsvd_page_pa;
578 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n",
579 part->remote_rp_pa);
580
581 part->remote_vars_pa = remote_vars_pa;
582 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
583 part->remote_vars_pa);
584
585 part->last_heartbeat = remote_vars->heartbeat;
586 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
587 part->last_heartbeat);
588
589 part->remote_vars_part_pa = remote_vars->vars_part_pa;
590 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
591 part->remote_vars_part_pa);
592
593 part->remote_act_nasid = remote_vars->act_nasid;
594 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
595 part->remote_act_nasid);
596
597 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
598 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
599 part->remote_act_phys_cpuid);
600
601 part->remote_amos_page_pa = remote_vars->amos_page_pa;
602 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
603 part->remote_amos_page_pa);
604
605 xpc_activate_partition(part);
606
607 } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
608 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
609
610 part->reactivate_nasid = nasid;
611 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
612 }
613}
614
615
616/*
617 * Loop through the activation AMO variables and process any bits
618 * which are set. Each bit indicates a nasid sending a partition
619 * activation or deactivation request.
620 *
621 * Return #of IRQs detected.
622 */
623int
624xpc_identify_act_IRQ_sender(void)
625{
626 int word, bit;
627 u64 nasid_mask;
628 u64 nasid; /* remote nasid */
629 int n_IRQs_detected = 0;
630 AMO_t *act_amos;
631 struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
632
633
634 act_amos = xpc_vars->act_amos;
635
636
637 /* scan through act AMO variable looking for non-zero entries */
638 for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
639
640 nasid_mask = xpc_IPI_receive(&act_amos[word]);
641 if (nasid_mask == 0) {
642 /* no IRQs from nasids in this variable */
643 continue;
644 }
645
646 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
647 nasid_mask);
648
649
650 /*
651 * If this nasid has been added to the machine since
652 * our partition was reset, this will retain the
653 * remote nasid in our reserved pages machine mask.
654 * This is used in the event of module reload.
655 */
656 rp->mach_nasids[word] |= nasid_mask;
657
658
659 /* locate the nasid(s) which sent interrupts */
660
661 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
662 if (nasid_mask & (1UL << bit)) {
663 n_IRQs_detected++;
664 nasid = XPC_NASID_FROM_W_B(word, bit);
665 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
666 nasid);
667 xpc_identify_act_IRQ_req(nasid);
668 }
669 }
670 }
671 return n_IRQs_detected;
672}
673
674
675/*
676 * Mark specified partition as active.
677 */
678enum xpc_retval
679xpc_mark_partition_active(struct xpc_partition *part)
680{
681 unsigned long irq_flags;
682 enum xpc_retval ret;
683
684
685 dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
686
687 spin_lock_irqsave(&part->act_lock, irq_flags);
688 if (part->act_state == XPC_P_ACTIVATING) {
689 part->act_state = XPC_P_ACTIVE;
690 ret = xpcSuccess;
691 } else {
692 DBUG_ON(part->reason == xpcSuccess);
693 ret = part->reason;
694 }
695 spin_unlock_irqrestore(&part->act_lock, irq_flags);
696
697 return ret;
698}
699
700
701/*
702 * Notify XPC that the partition is down.
703 */
704void
705xpc_deactivate_partition(const int line, struct xpc_partition *part,
706 enum xpc_retval reason)
707{
708 unsigned long irq_flags;
709 partid_t partid = XPC_PARTID(part);
710
711
712 spin_lock_irqsave(&part->act_lock, irq_flags);
713
714 if (part->act_state == XPC_P_INACTIVE) {
715 XPC_SET_REASON(part, reason, line);
716 spin_unlock_irqrestore(&part->act_lock, irq_flags);
717 if (reason == xpcReactivating) {
718 /* we interrupt ourselves to reactivate partition */
719 xpc_IPI_send_reactivate(part);
720 }
721 return;
722 }
723 if (part->act_state == XPC_P_DEACTIVATING) {
724 if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
725 reason == xpcReactivating) {
726 XPC_SET_REASON(part, reason, line);
727 }
728 spin_unlock_irqrestore(&part->act_lock, irq_flags);
729 return;
730 }
731
732 part->act_state = XPC_P_DEACTIVATING;
733 XPC_SET_REASON(part, reason, line);
734
735 spin_unlock_irqrestore(&part->act_lock, irq_flags);
736
737 XPC_DISALLOW_HB(partid, xpc_vars);
738
739 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
740 reason);
741
742 xpc_partition_down(part, reason);
743}
744
745
746/*
747 * Mark specified partition as active.
748 */
749void
750xpc_mark_partition_inactive(struct xpc_partition *part)
751{
752 unsigned long irq_flags;
753
754
755 dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
756 XPC_PARTID(part));
757
758 spin_lock_irqsave(&part->act_lock, irq_flags);
759 part->act_state = XPC_P_INACTIVE;
760 spin_unlock_irqrestore(&part->act_lock, irq_flags);
761 part->remote_rp_pa = 0;
762}
763
764
765/*
766 * SAL has provided a partition and machine mask. The partition mask
767 * contains a bit for each even nasid in our partition. The machine
768 * mask contains a bit for each even nasid in the entire machine.
769 *
770 * Using those two bit arrays, we can determine which nasids are
771 * known in the machine. Each should also have a reserved page
772 * initialized if they are available for partitioning.
773 */
774void
775xpc_discovery(void)
776{
777 void *remote_rp_base;
778 struct xpc_rsvd_page *remote_rp;
779 struct xpc_vars *remote_vars;
780 u64 remote_rsvd_page_pa;
781 u64 remote_vars_pa;
782 int region;
783 int max_regions;
784 int nasid;
785 struct xpc_rsvd_page *rp;
786 partid_t partid;
787 struct xpc_partition *part;
788 u64 *discovered_nasids;
789 enum xpc_retval ret;
790
791
792 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
793 GFP_KERNEL, &remote_rp_base);
794 if (remote_rp == NULL) {
795 return;
796 }
797 remote_vars = (struct xpc_vars *) remote_rp;
798
799
800 discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
801 GFP_KERNEL);
802 if (discovered_nasids == NULL) {
803 kfree(remote_rp_base);
804 return;
805 }
806 memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
807
808 rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
809
810 /*
811 * The term 'region' in this context refers to the minimum number of
812 * nodes that can comprise an access protection grouping. The access
813 * protection is in regards to memory, IOI and IPI.
814 */
815//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
816//>>> include/asm-ia64/sn/addrs.h
817#define SH1_MAX_REGIONS 64
818#define SH2_MAX_REGIONS 256
819 max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
820
821 for (region = 0; region < max_regions; region++) {
822
823 if ((volatile int) xpc_exiting) {
824 break;
825 }
826
827 dev_dbg(xpc_part, "searching region %d\n", region);
828
829 for (nasid = (region * sn_region_size * 2);
830 nasid < ((region + 1) * sn_region_size * 2);
831 nasid += 2) {
832
833 if ((volatile int) xpc_exiting) {
834 break;
835 }
836
837 dev_dbg(xpc_part, "checking nasid %d\n", nasid);
838
839
840 if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
841 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
842 "part of the local partition; skipping "
843 "region\n", nasid);
844 break;
845 }
846
847 if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
848 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
849 "not on Numa-Link network at reset\n",
850 nasid);
851 continue;
852 }
853
854 if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
855 dev_dbg(xpc_part, "Nasid %d is part of a "
856 "partition which was previously "
857 "discovered\n", nasid);
858 continue;
859 }
860
861
862 /* pull over the reserved page structure */
863
864 ret = xpc_get_remote_rp(nasid, discovered_nasids,
865 remote_rp, &remote_rsvd_page_pa);
866 if (ret != xpcSuccess) {
867 dev_dbg(xpc_part, "unable to get reserved page "
868 "from nasid %d, reason=%d\n", nasid,
869 ret);
870
871 if (ret == xpcLocalPartid) {
872 break;
873 }
874 continue;
875 }
876
877 remote_vars_pa = remote_rp->vars_pa;
878
879 partid = remote_rp->partid;
880 part = &xpc_partitions[partid];
881
882
883 /* pull over the cross partition variables */
884
885 ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
886 if (ret != xpcSuccess) {
887 dev_dbg(xpc_part, "unable to get XPC variables "
888 "from nasid %d, reason=%d\n", nasid,
889 ret);
890
891 XPC_DEACTIVATE_PARTITION(part, ret);
892 continue;
893 }
894
895 if (part->act_state != XPC_P_INACTIVE) {
896 dev_dbg(xpc_part, "partition %d on nasid %d is "
897 "already activating\n", partid, nasid);
898 break;
899 }
900
901 /*
902 * Register the remote partition's AMOs with SAL so it
903 * can handle and cleanup errors within that address
904 * range should the remote partition go down. We don't
905 * unregister this range because it is difficult to
906 * tell when outstanding writes to the remote partition
907 * are finished and thus when it is thus safe to
908 * unregister. This should not result in wasted space
909 * in the SAL xp_addr_region table because we should
910 * get the same page for remote_act_amos_pa after
911 * module reloads and system reboots.
912 */
913 if (sn_register_xp_addr_region(
914 remote_vars->amos_page_pa,
915 PAGE_SIZE, 1) < 0) {
916 dev_dbg(xpc_part, "partition %d failed to "
917 "register xp_addr region 0x%016lx\n",
918 partid, remote_vars->amos_page_pa);
919
920 XPC_SET_REASON(part, xpcPhysAddrRegFailed,
921 __LINE__);
922 break;
923 }
924
925 /*
926 * The remote nasid is valid and available.
927 * Send an interrupt to that nasid to notify
928 * it that we are ready to begin activation.
929 */
930 dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
931 "nasid %d, phys_cpuid 0x%x\n",
932 remote_vars->amos_page_pa,
933 remote_vars->act_nasid,
934 remote_vars->act_phys_cpuid);
935
936 xpc_IPI_send_activate(remote_vars);
937 }
938 }
939
940 kfree(discovered_nasids);
941 kfree(remote_rp_base);
942}
943
944
945/*
946 * Given a partid, get the nasids owned by that partition from the
947 * remote partitions reserved page.
948 */
949enum xpc_retval
950xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
951{
952 struct xpc_partition *part;
953 u64 part_nasid_pa;
954 int bte_res;
955
956
957 part = &xpc_partitions[partid];
958 if (part->remote_rp_pa == 0) {
959 return xpcPartitionDown;
960 }
961
962 part_nasid_pa = part->remote_rp_pa +
963 (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
964
965 bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
966 L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
967 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
968
969 return xpc_map_bte_errors(bte_res);
970}
971