aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/sn/kernel
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-10-29 17:49:12 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-10-29 17:49:12 -0400
commitb0c4e148bd591629749d02a8fbc8d81c26d548cf (patch)
tree3e2142635f3dc2ceeae870ead2dceab7b9c6def1 /arch/ia64/sn/kernel
parent5615ca7906aefbdc3318604c89db5931d0a25910 (diff)
parentbe15cd72d256e5eb3261a781b8507fac83ab33f6 (diff)
Merge branch 'master'
Diffstat (limited to 'arch/ia64/sn/kernel')
-rw-r--r--arch/ia64/sn/kernel/bte.c2
-rw-r--r--arch/ia64/sn/kernel/io_init.c4
-rw-r--r--arch/ia64/sn/kernel/setup.c160
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c31
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c4
-rw-r--r--arch/ia64/sn/kernel/tiocx.c67
-rw-r--r--arch/ia64/sn/kernel/xpc.h366
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c329
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c330
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c475
10 files changed, 1240 insertions, 528 deletions
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 45854c637e9c..d71f4de44f79 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -87,7 +87,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
87 unsigned long irq_flags; 87 unsigned long irq_flags;
88 unsigned long itc_end = 0; 88 unsigned long itc_end = 0;
89 int nasid_to_try[MAX_NODES_TO_TRY]; 89 int nasid_to_try[MAX_NODES_TO_TRY];
90 int my_nasid = get_nasid(); 90 int my_nasid = cpuid_to_nasid(raw_smp_processor_id());
91 int bte_if_index, nasid_index; 91 int bte_if_index, nasid_index;
92 int bte_first, btes_per_node = BTES_PER_NODE; 92 int bte_first, btes_per_node = BTES_PER_NODE;
93 93
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 906622d9f933..b4f5053f5e1b 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -22,8 +22,6 @@
22#include "xtalk/hubdev.h" 22#include "xtalk/hubdev.h"
23#include "xtalk/xwidgetdev.h" 23#include "xtalk/xwidgetdev.h"
24 24
25nasid_t master_nasid = INVALID_NASID; /* Partition Master */
26
27static struct list_head sn_sysdata_list; 25static struct list_head sn_sysdata_list;
28 26
29/* sysdata list struct */ 27/* sysdata list struct */
@@ -165,7 +163,7 @@ static void sn_fixup_ionodes(void)
165 * Get SGI Specific HUB chipset information. 163 * Get SGI Specific HUB chipset information.
166 * Inform Prom that this kernel can support domain bus numbering. 164 * Inform Prom that this kernel can support domain bus numbering.
167 */ 165 */
168 for (i = 0; i < numionodes; i++) { 166 for (i = 0; i < num_cnodes; i++) {
169 hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); 167 hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
170 nasid = cnodeid_to_nasid(i); 168 nasid = cnodeid_to_nasid(i);
171 hubdev->max_segment_number = 0xffffffff; 169 hubdev->max_segment_number = 0xffffffff;
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 6f8c5883716b..0fb579ef18c2 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -59,8 +59,6 @@ DEFINE_PER_CPU(struct pda_s, pda_percpu);
59 59
60#define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */ 60#define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */
61 61
62lboard_t *root_lboard[MAX_COMPACT_NODES];
63
64extern void bte_init_node(nodepda_t *, cnodeid_t); 62extern void bte_init_node(nodepda_t *, cnodeid_t);
65 63
66extern void sn_timer_init(void); 64extern void sn_timer_init(void);
@@ -97,15 +95,15 @@ u8 sn_region_size;
97EXPORT_SYMBOL(sn_region_size); 95EXPORT_SYMBOL(sn_region_size);
98int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */ 96int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
99 97
100short physical_node_map[MAX_PHYSNODE_ID]; 98short physical_node_map[MAX_NUMALINK_NODES];
101static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS]; 99static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
102 100
103EXPORT_SYMBOL(physical_node_map); 101EXPORT_SYMBOL(physical_node_map);
104 102
105int numionodes; 103int num_cnodes;
106 104
107static void sn_init_pdas(char **); 105static void sn_init_pdas(char **);
108static void scan_for_ionodes(void); 106static void build_cnode_tables(void);
109 107
110static nodepda_t *nodepdaindr[MAX_COMPACT_NODES]; 108static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
111 109
@@ -140,19 +138,6 @@ char drive_info[4 * 16];
140#endif 138#endif
141 139
142/* 140/*
143 * Get nasid of current cpu early in boot before nodepda is initialized
144 */
145static int
146boot_get_nasid(void)
147{
148 int nasid;
149
150 if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL))
151 BUG();
152 return nasid;
153}
154
155/*
156 * This routine can only be used during init, since 141 * This routine can only be used during init, since
157 * smp_boot_data is an init data structure. 142 * smp_boot_data is an init data structure.
158 * We have to use smp_boot_data.cpu_phys_id to find 143 * We have to use smp_boot_data.cpu_phys_id to find
@@ -223,7 +208,6 @@ void __init early_sn_setup(void)
223} 208}
224 209
225extern int platform_intr_list[]; 210extern int platform_intr_list[];
226extern nasid_t master_nasid;
227static int __initdata shub_1_1_found = 0; 211static int __initdata shub_1_1_found = 0;
228 212
229/* 213/*
@@ -269,7 +253,6 @@ static void __init sn_check_for_wars(void)
269void __init sn_setup(char **cmdline_p) 253void __init sn_setup(char **cmdline_p)
270{ 254{
271 long status, ticks_per_sec, drift; 255 long status, ticks_per_sec, drift;
272 int pxm;
273 u32 version = sn_sal_rev(); 256 u32 version = sn_sal_rev();
274 extern void sn_cpu_init(void); 257 extern void sn_cpu_init(void);
275 258
@@ -300,11 +283,10 @@ void __init sn_setup(char **cmdline_p)
300 283
301 MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY; 284 MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
302 285
303 memset(physical_node_map, -1, sizeof(physical_node_map)); 286 /*
304 for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++) 287 * Build the tables for managing cnodes.
305 if (pxm_to_nid_map[pxm] != -1) 288 */
306 physical_node_map[pxm_to_nasid(pxm)] = 289 build_cnode_tables();
307 pxm_to_nid_map[pxm];
308 290
309 /* 291 /*
310 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard 292 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
@@ -319,8 +301,6 @@ void __init sn_setup(char **cmdline_p)
319 301
320 printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF); 302 printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
321 303
322 master_nasid = boot_get_nasid();
323
324 status = 304 status =
325 ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, 305 ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
326 &drift); 306 &drift);
@@ -378,15 +358,6 @@ static void __init sn_init_pdas(char **cmdline_p)
378{ 358{
379 cnodeid_t cnode; 359 cnodeid_t cnode;
380 360
381 memset(sn_cnodeid_to_nasid, -1,
382 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
383 for_each_online_node(cnode)
384 sn_cnodeid_to_nasid[cnode] =
385 pxm_to_nasid(nid_to_pxm_map[cnode]);
386
387 numionodes = num_online_nodes();
388 scan_for_ionodes();
389
390 /* 361 /*
391 * Allocate & initalize the nodepda for each node. 362 * Allocate & initalize the nodepda for each node.
392 */ 363 */
@@ -402,7 +373,7 @@ static void __init sn_init_pdas(char **cmdline_p)
402 /* 373 /*
403 * Allocate & initialize nodepda for TIOs. For now, put them on node 0. 374 * Allocate & initialize nodepda for TIOs. For now, put them on node 0.
404 */ 375 */
405 for (cnode = num_online_nodes(); cnode < numionodes; cnode++) { 376 for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) {
406 nodepdaindr[cnode] = 377 nodepdaindr[cnode] =
407 alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); 378 alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
408 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); 379 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
@@ -411,7 +382,7 @@ static void __init sn_init_pdas(char **cmdline_p)
411 /* 382 /*
412 * Now copy the array of nodepda pointers to each nodepda. 383 * Now copy the array of nodepda pointers to each nodepda.
413 */ 384 */
414 for (cnode = 0; cnode < numionodes; cnode++) 385 for (cnode = 0; cnode < num_cnodes; cnode++)
415 memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr, 386 memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
416 sizeof(nodepdaindr)); 387 sizeof(nodepdaindr));
417 388
@@ -428,7 +399,7 @@ static void __init sn_init_pdas(char **cmdline_p)
428 * Initialize the per node hubdev. This includes IO Nodes and 399 * Initialize the per node hubdev. This includes IO Nodes and
429 * headless/memless nodes. 400 * headless/memless nodes.
430 */ 401 */
431 for (cnode = 0; cnode < numionodes; cnode++) { 402 for (cnode = 0; cnode < num_cnodes; cnode++) {
432 hubdev_init_node(nodepdaindr[cnode], cnode); 403 hubdev_init_node(nodepdaindr[cnode], cnode);
433 } 404 }
434} 405}
@@ -553,87 +524,58 @@ void __init sn_cpu_init(void)
553} 524}
554 525
555/* 526/*
556 * Scan klconfig for ionodes. Add the nasids to the 527 * Build tables for converting between NASIDs and cnodes.
557 * physical_node_map and the pda and increment numionodes.
558 */ 528 */
529static inline int __init board_needs_cnode(int type)
530{
531 return (type == KLTYPE_SNIA || type == KLTYPE_TIO);
532}
559 533
560static void __init scan_for_ionodes(void) 534void __init build_cnode_tables(void)
561{ 535{
562 int nasid = 0; 536 int nasid;
537 int node;
563 lboard_t *brd; 538 lboard_t *brd;
564 539
565 /* fakeprom does not support klgraph */ 540 memset(physical_node_map, -1, sizeof(physical_node_map));
566 if (IS_RUNNING_ON_FAKE_PROM()) 541 memset(sn_cnodeid_to_nasid, -1,
567 return; 542 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
568
569 /* Setup ionodes with memory */
570 for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
571 char *klgraph_header;
572 cnodeid_t cnodeid;
573
574 if (physical_node_map[nasid] == -1)
575 continue;
576 543
577 cnodeid = -1; 544 /*
578 klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid)); 545 * First populate the tables with C/M bricks. This ensures that
579 if (!klgraph_header) { 546 * cnode == node for all C & M bricks.
580 BUG(); /* All nodes must have klconfig tables! */ 547 */
581 } 548 for_each_online_node(node) {
582 cnodeid = nasid_to_cnodeid(nasid); 549 nasid = pxm_to_nasid(nid_to_pxm_map[node]);
583 root_lboard[cnodeid] = (lboard_t *) 550 sn_cnodeid_to_nasid[node] = nasid;
584 NODE_OFFSET_TO_LBOARD((nasid), 551 physical_node_map[nasid] = node;
585 ((kl_config_hdr_t
586 *) (klgraph_header))->
587 ch_board_info);
588 } 552 }
589 553
590 /* Scan headless/memless IO Nodes. */ 554 /*
591 for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { 555 * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
592 /* if there's no nasid, don't try to read the klconfig on the node */ 556 * limit on the number of nodes, we can't use the generic node numbers
593 if (physical_node_map[nasid] == -1) 557 * for this. Note that num_cnodes is incremented below as TIOs or
594 continue; 558 * headless/memoryless nodes are discovered.
595 brd = find_lboard_any((lboard_t *) 559 */
596 root_lboard[nasid_to_cnodeid(nasid)], 560 num_cnodes = num_online_nodes();
597 KLTYPE_SNIA);
598 if (brd) {
599 brd = KLCF_NEXT_ANY(brd); /* Skip this node's lboard */
600 if (!brd)
601 continue;
602 }
603
604 brd = find_lboard_any(brd, KLTYPE_SNIA);
605 561
606 while (brd) { 562 /* fakeprom does not support klgraph */
607 sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; 563 if (IS_RUNNING_ON_FAKE_PROM())
608 physical_node_map[brd->brd_nasid] = numionodes; 564 return;
609 root_lboard[numionodes] = brd;
610 numionodes++;
611 brd = KLCF_NEXT_ANY(brd);
612 if (!brd)
613 break;
614
615 brd = find_lboard_any(brd, KLTYPE_SNIA);
616 }
617 }
618 565
619 /* Scan for TIO nodes. */ 566 /* Find TIOs & headless/memoryless nodes and add them to the tables */
620 for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) { 567 for_each_online_node(node) {
621 /* if there's no nasid, don't try to read the klconfig on the node */ 568 kl_config_hdr_t *klgraph_header;
622 if (physical_node_map[nasid] == -1) 569 nasid = cnodeid_to_nasid(node);
623 continue; 570 if ((klgraph_header = ia64_sn_get_klconfig_addr(nasid)) == NULL)
624 brd = find_lboard_any((lboard_t *) 571 BUG();
625 root_lboard[nasid_to_cnodeid(nasid)], 572 brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
626 KLTYPE_TIO);
627 while (brd) { 573 while (brd) {
628 sn_cnodeid_to_nasid[numionodes] = brd->brd_nasid; 574 if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
629 physical_node_map[brd->brd_nasid] = numionodes; 575 sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid;
630 root_lboard[numionodes] = brd; 576 physical_node_map[brd->brd_nasid] = num_cnodes++;
631 numionodes++; 577 }
632 brd = KLCF_NEXT_ANY(brd); 578 brd = find_lboard_next(brd);
633 if (!brd)
634 break;
635
636 brd = find_lboard_any(brd, KLTYPE_TIO);
637 } 579 }
638 } 580 }
639} 581}
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 0a4ee50c302f..49b530c39a42 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -177,6 +177,7 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
177 177
178/** 178/**
179 * sn2_global_tlb_purge - globally purge translation cache of virtual address range 179 * sn2_global_tlb_purge - globally purge translation cache of virtual address range
180 * @mm: mm_struct containing virtual address range
180 * @start: start of virtual address range 181 * @start: start of virtual address range
181 * @end: end of virtual address range 182 * @end: end of virtual address range
182 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc)) 183 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
@@ -188,21 +189,22 @@ void sn_tlb_migrate_finish(struct mm_struct *mm)
188 * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context. 189 * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
189 * - cpu_vm_mask is converted into a nodemask of the nodes containing the 190 * - cpu_vm_mask is converted into a nodemask of the nodes containing the
190 * cpus in cpu_vm_mask. 191 * cpus in cpu_vm_mask.
191 * - if only one bit is set in cpu_vm_mask & it is the current cpu, 192 * - if only one bit is set in cpu_vm_mask & it is the current cpu & the
192 * then only the local TLB needs to be flushed. This flushing can be done 193 * process is purging its own virtual address range, then only the
193 * using ptc.l. This is the common case & avoids the global spinlock. 194 * local TLB needs to be flushed. This flushing can be done using
195 * ptc.l. This is the common case & avoids the global spinlock.
194 * - if multiple cpus have loaded the context, then flushing has to be 196 * - if multiple cpus have loaded the context, then flushing has to be
195 * done with ptc.g/MMRs under protection of the global ptc_lock. 197 * done with ptc.g/MMRs under protection of the global ptc_lock.
196 */ 198 */
197 199
198void 200void
199sn2_global_tlb_purge(unsigned long start, unsigned long end, 201sn2_global_tlb_purge(struct mm_struct *mm, unsigned long start,
200 unsigned long nbits) 202 unsigned long end, unsigned long nbits)
201{ 203{
202 int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; 204 int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
205 int mymm = (mm == current->active_mm);
203 volatile unsigned long *ptc0, *ptc1; 206 volatile unsigned long *ptc0, *ptc1;
204 unsigned long itc, itc2, flags, data0 = 0, data1 = 0; 207 unsigned long itc, itc2, flags, data0 = 0, data1 = 0, rr_value;
205 struct mm_struct *mm = current->active_mm;
206 short nasids[MAX_NUMNODES], nix; 208 short nasids[MAX_NUMNODES], nix;
207 nodemask_t nodes_flushed; 209 nodemask_t nodes_flushed;
208 210
@@ -216,9 +218,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
216 i++; 218 i++;
217 } 219 }
218 220
221 if (i == 0)
222 return;
223
219 preempt_disable(); 224 preempt_disable();
220 225
221 if (likely(i == 1 && lcpu == smp_processor_id())) { 226 if (likely(i == 1 && lcpu == smp_processor_id() && mymm)) {
222 do { 227 do {
223 ia64_ptcl(start, nbits << 2); 228 ia64_ptcl(start, nbits << 2);
224 start += (1UL << nbits); 229 start += (1UL << nbits);
@@ -229,7 +234,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
229 return; 234 return;
230 } 235 }
231 236
232 if (atomic_read(&mm->mm_users) == 1) { 237 if (atomic_read(&mm->mm_users) == 1 && mymm) {
233 flush_tlb_mm(mm); 238 flush_tlb_mm(mm);
234 __get_cpu_var(ptcstats).change_rid++; 239 __get_cpu_var(ptcstats).change_rid++;
235 preempt_enable(); 240 preempt_enable();
@@ -241,11 +246,13 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
241 for_each_node_mask(cnode, nodes_flushed) 246 for_each_node_mask(cnode, nodes_flushed)
242 nasids[nix++] = cnodeid_to_nasid(cnode); 247 nasids[nix++] = cnodeid_to_nasid(cnode);
243 248
249 rr_value = (mm->context << 3) | REGION_NUMBER(start);
250
244 shub1 = is_shub1(); 251 shub1 = is_shub1();
245 if (shub1) { 252 if (shub1) {
246 data0 = (1UL << SH1_PTC_0_A_SHFT) | 253 data0 = (1UL << SH1_PTC_0_A_SHFT) |
247 (nbits << SH1_PTC_0_PS_SHFT) | 254 (nbits << SH1_PTC_0_PS_SHFT) |
248 ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) | 255 (rr_value << SH1_PTC_0_RID_SHFT) |
249 (1UL << SH1_PTC_0_START_SHFT); 256 (1UL << SH1_PTC_0_START_SHFT);
250 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0); 257 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
251 ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1); 258 ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
@@ -254,7 +261,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
254 (nbits << SH2_PTC_PS_SHFT) | 261 (nbits << SH2_PTC_PS_SHFT) |
255 (1UL << SH2_PTC_START_SHFT); 262 (1UL << SH2_PTC_START_SHFT);
256 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC + 263 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC +
257 ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) ); 264 (rr_value << SH2_PTC_RID_SHFT));
258 ptc1 = NULL; 265 ptc1 = NULL;
259 } 266 }
260 267
@@ -275,7 +282,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
275 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); 282 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
276 for (i = 0; i < nix; i++) { 283 for (i = 0; i < nix; i++) {
277 nasid = nasids[i]; 284 nasid = nasids[i];
278 if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) { 285 if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid && mymm)) {
279 ia64_ptcga(start, nbits << 2); 286 ia64_ptcga(start, nbits << 2);
280 ia64_srlz_i(); 287 ia64_srlz_i();
281 } else { 288 } else {
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 0513aacac8c1..6c6fbca3229c 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -476,8 +476,8 @@ static int sn_topology_show(struct seq_file *s, void *d)
476 for_each_online_cpu(j) { 476 for_each_online_cpu(j) {
477 seq_printf(s, j ? ":%d" : ", dist %d", 477 seq_printf(s, j ? ":%d" : ", dist %d",
478 node_distance( 478 node_distance(
479 cpuid_to_cnodeid(i), 479 cpu_to_node(i),
480 cpuid_to_cnodeid(j))); 480 cpu_to_node(j)));
481 } 481 }
482 seq_putc(s, '\n'); 482 seq_putc(s, '\n');
483 } 483 }
diff --git a/arch/ia64/sn/kernel/tiocx.c b/arch/ia64/sn/kernel/tiocx.c
index b45db5133f55..0d8592a745a7 100644
--- a/arch/ia64/sn/kernel/tiocx.c
+++ b/arch/ia64/sn/kernel/tiocx.c
@@ -183,11 +183,12 @@ int cx_driver_unregister(struct cx_drv *cx_driver)
183 * @part_num: device's part number 183 * @part_num: device's part number
184 * @mfg_num: device's manufacturer number 184 * @mfg_num: device's manufacturer number
185 * @hubdev: hub info associated with this device 185 * @hubdev: hub info associated with this device
186 * @bt: board type of the device
186 * 187 *
187 */ 188 */
188int 189int
189cx_device_register(nasid_t nasid, int part_num, int mfg_num, 190cx_device_register(nasid_t nasid, int part_num, int mfg_num,
190 struct hubdev_info *hubdev) 191 struct hubdev_info *hubdev, int bt)
191{ 192{
192 struct cx_dev *cx_dev; 193 struct cx_dev *cx_dev;
193 194
@@ -200,6 +201,7 @@ cx_device_register(nasid_t nasid, int part_num, int mfg_num,
200 cx_dev->cx_id.mfg_num = mfg_num; 201 cx_dev->cx_id.mfg_num = mfg_num;
201 cx_dev->cx_id.nasid = nasid; 202 cx_dev->cx_id.nasid = nasid;
202 cx_dev->hubdev = hubdev; 203 cx_dev->hubdev = hubdev;
204 cx_dev->bt = bt;
203 205
204 cx_dev->dev.parent = NULL; 206 cx_dev->dev.parent = NULL;
205 cx_dev->dev.bus = &tiocx_bus_type; 207 cx_dev->dev.bus = &tiocx_bus_type;
@@ -238,7 +240,8 @@ static int cx_device_reload(struct cx_dev *cx_dev)
238{ 240{
239 cx_device_unregister(cx_dev); 241 cx_device_unregister(cx_dev);
240 return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num, 242 return cx_device_register(cx_dev->cx_id.nasid, cx_dev->cx_id.part_num,
241 cx_dev->cx_id.mfg_num, cx_dev->hubdev); 243 cx_dev->cx_id.mfg_num, cx_dev->hubdev,
244 cx_dev->bt);
242} 245}
243 246
244static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget, 247static inline uint64_t tiocx_intr_alloc(nasid_t nasid, int widget,
@@ -365,26 +368,20 @@ static void tio_corelet_reset(nasid_t nasid, int corelet)
365 udelay(2000); 368 udelay(2000);
366} 369}
367 370
368static int tiocx_btchar_get(int nasid) 371static int is_fpga_tio(int nasid, int *bt)
369{ 372{
370 moduleid_t module_id; 373 int ioboard_type;
371 geoid_t geoid;
372 int cnodeid;
373
374 cnodeid = nasid_to_cnodeid(nasid);
375 geoid = cnodeid_get_geoid(cnodeid);
376 module_id = geo_module(geoid);
377 return MODULE_GET_BTCHAR(module_id);
378}
379 374
380static int is_fpga_brick(int nasid) 375 ioboard_type = ia64_sn_sysctl_ioboard_get(nasid);
381{ 376
382 switch (tiocx_btchar_get(nasid)) { 377 switch (ioboard_type) {
383 case L1_BRICKTYPE_SA: 378 case L1_BRICKTYPE_SA:
384 case L1_BRICKTYPE_ATHENA: 379 case L1_BRICKTYPE_ATHENA:
385 case L1_BRICKTYPE_DAYTONA: 380 case L1_BOARDTYPE_DAYTONA:
381 *bt = ioboard_type;
386 return 1; 382 return 1;
387 } 383 }
384
388 return 0; 385 return 0;
389} 386}
390 387
@@ -407,16 +404,22 @@ static int tiocx_reload(struct cx_dev *cx_dev)
407 404
408 if (bitstream_loaded(nasid)) { 405 if (bitstream_loaded(nasid)) {
409 uint64_t cx_id; 406 uint64_t cx_id;
410 407 int rv;
411 cx_id = 408
412 *(volatile uint64_t *)(TIO_SWIN_BASE(nasid, TIOCX_CORELET) + 409 rv = ia64_sn_sysctl_tio_clock_reset(nasid);
410 if (rv) {
411 printk(KERN_ALERT "CX port JTAG reset failed.\n");
412 } else {
413 cx_id = *(volatile uint64_t *)
414 (TIO_SWIN_BASE(nasid, TIOCX_CORELET) +
413 WIDGET_ID); 415 WIDGET_ID);
414 part_num = XWIDGET_PART_NUM(cx_id); 416 part_num = XWIDGET_PART_NUM(cx_id);
415 mfg_num = XWIDGET_MFG_NUM(cx_id); 417 mfg_num = XWIDGET_MFG_NUM(cx_id);
416 DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num); 418 DBG("part= 0x%x, mfg= 0x%x\n", part_num, mfg_num);
417 /* just ignore it if it's a CE */ 419 /* just ignore it if it's a CE */
418 if (part_num == TIO_CE_ASIC_PARTNUM) 420 if (part_num == TIO_CE_ASIC_PARTNUM)
419 return 0; 421 return 0;
422 }
420 } 423 }
421 424
422 cx_dev->cx_id.part_num = part_num; 425 cx_dev->cx_id.part_num = part_num;
@@ -436,10 +439,10 @@ static ssize_t show_cxdev_control(struct device *dev, struct device_attribute *a
436{ 439{
437 struct cx_dev *cx_dev = to_cx_dev(dev); 440 struct cx_dev *cx_dev = to_cx_dev(dev);
438 441
439 return sprintf(buf, "0x%x 0x%x 0x%x %d\n", 442 return sprintf(buf, "0x%x 0x%x 0x%x 0x%x\n",
440 cx_dev->cx_id.nasid, 443 cx_dev->cx_id.nasid,
441 cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num, 444 cx_dev->cx_id.part_num, cx_dev->cx_id.mfg_num,
442 tiocx_btchar_get(cx_dev->cx_id.nasid)); 445 cx_dev->bt);
443} 446}
444 447
445static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf, 448static ssize_t store_cxdev_control(struct device *dev, struct device_attribute *attr, const char *buf,
@@ -486,13 +489,13 @@ static int __init tiocx_init(void)
486 489
487 bus_register(&tiocx_bus_type); 490 bus_register(&tiocx_bus_type);
488 491
489 for (cnodeid = 0; cnodeid < MAX_COMPACT_NODES; cnodeid++) { 492 for (cnodeid = 0; cnodeid < num_cnodes; cnodeid++) {
490 nasid_t nasid; 493 nasid_t nasid;
494 int bt;
491 495
492 if ((nasid = cnodeid_to_nasid(cnodeid)) < 0) 496 nasid = cnodeid_to_nasid(cnodeid);
493 break; /* No more nasids .. bail out of loop */
494 497
495 if ((nasid & 0x1) && is_fpga_brick(nasid)) { 498 if ((nasid & 0x1) && is_fpga_tio(nasid, &bt)) {
496 struct hubdev_info *hubdev; 499 struct hubdev_info *hubdev;
497 struct xwidget_info *widgetp; 500 struct xwidget_info *widgetp;
498 501
@@ -512,7 +515,7 @@ static int __init tiocx_init(void)
512 515
513 if (cx_device_register 516 if (cx_device_register
514 (nasid, widgetp->xwi_hwid.part_num, 517 (nasid, widgetp->xwi_hwid.part_num,
515 widgetp->xwi_hwid.mfg_num, hubdev) < 0) 518 widgetp->xwi_hwid.mfg_num, hubdev, bt) < 0)
516 return -ENXIO; 519 return -ENXIO;
517 else 520 else
518 found_tiocx_device++; 521 found_tiocx_device++;
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index e5f5a4e51f70..fbcedc7c27fa 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -57,7 +57,7 @@
57#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2) 57#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
58 58
59#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ 59#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
60#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */ 60#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
61 61
62/* define the process name of HB checker and the CPU it is pinned to */ 62/* define the process name of HB checker and the CPU it is pinned to */
63#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" 63#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
@@ -67,34 +67,82 @@
67#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" 67#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
68 68
69 69
70#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p)))
71#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p))
72#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p)))
73
74
75/* 70/*
76 * Reserved Page provided by SAL. 71 * the reserved page
72 *
73 * SAL reserves one page of memory per partition for XPC. Though a full page
74 * in length (16384 bytes), its starting address is not page aligned, but it
75 * is cacheline aligned. The reserved page consists of the following:
76 *
77 * reserved page header
78 *
79 * The first cacheline of the reserved page contains the header
80 * (struct xpc_rsvd_page). Before SAL initialization has completed,
81 * SAL has set up the following fields of the reserved page header:
82 * SAL_signature, SAL_version, partid, and nasids_size. The other
83 * fields are set up by XPC. (xpc_rsvd_page points to the local
84 * partition's reserved page.)
77 * 85 *
78 * SAL provides one page per partition of reserved memory. When SAL 86 * part_nasids mask
79 * initialization is complete, SAL_signature, SAL_version, partid, 87 * mach_nasids mask
80 * part_nasids, and mach_nasids are set. 88 *
89 * SAL also sets up two bitmaps (or masks), one that reflects the actual
90 * nasids in this partition (part_nasids), and the other that reflects
91 * the actual nasids in the entire machine (mach_nasids). We're only
92 * interested in the even numbered nasids (which contain the processors
93 * and/or memory), so we only need half as many bits to represent the
94 * nasids. The part_nasids mask is located starting at the first cacheline
95 * following the reserved page header. The mach_nasids mask follows right
96 * after the part_nasids mask. The size in bytes of each mask is reflected
97 * by the reserved page header field 'nasids_size'. (Local partition's
98 * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
99 *
100 * vars
101 * vars part
102 *
103 * Immediately following the mach_nasids mask are the XPC variables
104 * required by other partitions. First are those that are generic to all
105 * partitions (vars), followed on the next available cacheline by those
106 * which are partition specific (vars part). These are setup by XPC.
107 * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
81 * 108 *
82 * Note: Until vars_pa is set, the partition XPC code has not been initialized. 109 * Note: Until vars_pa is set, the partition XPC code has not been initialized.
83 */ 110 */
84struct xpc_rsvd_page { 111struct xpc_rsvd_page {
85 u64 SAL_signature; /* SAL unique signature */ 112 u64 SAL_signature; /* SAL: unique signature */
86 u64 SAL_version; /* SAL specified version */ 113 u64 SAL_version; /* SAL: version */
87 u8 partid; /* partition ID from SAL */ 114 u8 partid; /* SAL: partition ID */
88 u8 version; 115 u8 version;
89 u8 pad[6]; /* pad to u64 align */ 116 u8 pad1[6]; /* align to next u64 in cacheline */
90 volatile u64 vars_pa; 117 volatile u64 vars_pa;
91 u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; 118 struct timespec stamp; /* time when reserved page was setup by XPC */
92 u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; 119 u64 pad2[9]; /* align to last u64 in cacheline */
120 u64 nasids_size; /* SAL: size of each nasid mask in bytes */
93}; 121};
94#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */
95 122
96#define XPC_RSVD_PAGE_ALIGNED_SIZE \ 123#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
97 (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))) 124
125#define XPC_SUPPORTS_RP_STAMP(_version) \
126 (_version >= _XPC_VERSION(1,1))
127
128/*
129 * compare stamps - the return value is:
130 *
131 * < 0, if stamp1 < stamp2
132 * = 0, if stamp1 == stamp2
133 * > 0, if stamp1 > stamp2
134 */
135static inline int
136xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
137{
138 int ret;
139
140
141 if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
142 ret = stamp1->tv_nsec - stamp2->tv_nsec;
143 }
144 return ret;
145}
98 146
99 147
100/* 148/*
@@ -121,11 +169,58 @@ struct xpc_vars {
121 u64 vars_part_pa; 169 u64 vars_part_pa;
122 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ 170 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
123 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ 171 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
124 AMO_t *act_amos; /* pointer to the first activation AMO */
125}; 172};
126#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */
127 173
128#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars))) 174#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
175
176#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
177 (_version >= _XPC_VERSION(3,1))
178
179
180static inline int
181xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
182{
183 return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
184}
185
186static inline void
187xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
188{
189 u64 old_mask, new_mask;
190
191 do {
192 old_mask = vars->heartbeating_to_mask;
193 new_mask = (old_mask | (1UL << partid));
194 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
195 old_mask);
196}
197
198static inline void
199xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
200{
201 u64 old_mask, new_mask;
202
203 do {
204 old_mask = vars->heartbeating_to_mask;
205 new_mask = (old_mask & ~(1UL << partid));
206 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
207 old_mask);
208}
209
210
211/*
212 * The AMOs page consists of a number of AMO variables which are divided into
213 * four groups, The first two groups are used to identify an IRQ's sender.
214 * These two groups consist of 64 and 128 AMO variables respectively. The last
215 * two groups, consisting of just one AMO variable each, are used to identify
216 * the remote partitions that are currently engaged (from the viewpoint of
217 * the XPC running on the remote partition).
218 */
219#define XPC_NOTIFY_IRQ_AMOS 0
220#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
221#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
222#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
223
129 224
130/* 225/*
131 * The following structure describes the per partition specific variables. 226 * The following structure describes the per partition specific variables.
@@ -165,6 +260,16 @@ struct xpc_vars_part {
165#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */ 260#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
166 261
167 262
263/* the reserved page sizes and offsets */
264
265#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
266#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
267
268#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE)
269#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
270#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words)
271#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE)
272
168 273
169/* 274/*
170 * Functions registered by add_timer() or called by kernel_thread() only 275 * Functions registered by add_timer() or called by kernel_thread() only
@@ -349,6 +454,9 @@ struct xpc_channel {
349 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */ 454 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
350 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */ 455 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
351 456
457 u8 delayed_IPI_flags; /* IPI flags received, but delayed */
458 /* action until channel disconnected */
459
352 /* queue of msg senders who want to be notified when msg received */ 460 /* queue of msg senders who want to be notified when msg received */
353 461
354 atomic_t n_to_notify; /* #of msg senders to notify */ 462 atomic_t n_to_notify; /* #of msg senders to notify */
@@ -358,7 +466,7 @@ struct xpc_channel {
358 void *key; /* pointer to user's key */ 466 void *key; /* pointer to user's key */
359 467
360 struct semaphore msg_to_pull_sema; /* next msg to pull serialization */ 468 struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
361 struct semaphore teardown_sema; /* wait for teardown completion */ 469 struct semaphore wdisconnect_sema; /* wait for channel disconnect */
362 470
363 struct xpc_openclose_args *local_openclose_args; /* args passed on */ 471 struct xpc_openclose_args *local_openclose_args; /* args passed on */
364 /* opening or closing of channel */ 472 /* opening or closing of channel */
@@ -410,6 +518,8 @@ struct xpc_channel {
410 518
411#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ 519#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */
412#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ 520#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */
521#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */
522#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */
413 523
414 524
415 525
@@ -422,6 +532,8 @@ struct xpc_partition {
422 532
423 /* XPC HB infrastructure */ 533 /* XPC HB infrastructure */
424 534
535 u8 remote_rp_version; /* version# of partition's rsvd pg */
536 struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
425 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ 537 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
426 u64 remote_vars_pa; /* phys addr of partition's vars */ 538 u64 remote_vars_pa; /* phys addr of partition's vars */
427 u64 remote_vars_part_pa; /* phys addr of partition's vars part */ 539 u64 remote_vars_part_pa; /* phys addr of partition's vars part */
@@ -432,14 +544,18 @@ struct xpc_partition {
432 u32 act_IRQ_rcvd; /* IRQs since activation */ 544 u32 act_IRQ_rcvd; /* IRQs since activation */
433 spinlock_t act_lock; /* protect updating of act_state */ 545 spinlock_t act_lock; /* protect updating of act_state */
434 u8 act_state; /* from XPC HB viewpoint */ 546 u8 act_state; /* from XPC HB viewpoint */
547 u8 remote_vars_version; /* version# of partition's vars */
435 enum xpc_retval reason; /* reason partition is deactivating */ 548 enum xpc_retval reason; /* reason partition is deactivating */
436 int reason_line; /* line# deactivation initiated from */ 549 int reason_line; /* line# deactivation initiated from */
437 int reactivate_nasid; /* nasid in partition to reactivate */ 550 int reactivate_nasid; /* nasid in partition to reactivate */
438 551
552 unsigned long disengage_request_timeout; /* timeout in jiffies */
553 struct timer_list disengage_request_timer;
554
439 555
440 /* XPC infrastructure referencing and teardown control */ 556 /* XPC infrastructure referencing and teardown control */
441 557
442 volatile u8 setup_state; /* infrastructure setup state */ 558 volatile u8 setup_state; /* infrastructure setup state */
443 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */ 559 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
444 atomic_t references; /* #of references to infrastructure */ 560 atomic_t references; /* #of references to infrastructure */
445 561
@@ -454,6 +570,7 @@ struct xpc_partition {
454 570
455 u8 nchannels; /* #of defined channels supported */ 571 u8 nchannels; /* #of defined channels supported */
456 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ 572 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
573 atomic_t nchannels_engaged;/* #of channels engaged with remote part */
457 struct xpc_channel *channels;/* array of channel structures */ 574 struct xpc_channel *channels;/* array of channel structures */
458 575
459 void *local_GPs_base; /* base address of kmalloc'd space */ 576 void *local_GPs_base; /* base address of kmalloc'd space */
@@ -518,6 +635,7 @@ struct xpc_partition {
518#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ 635#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
519 636
520 637
638
521/* 639/*
522 * struct xpc_partition IPI_timer #of seconds to wait before checking for 640 * struct xpc_partition IPI_timer #of seconds to wait before checking for
523 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until 641 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
@@ -526,6 +644,13 @@ struct xpc_partition {
526#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) 644#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
527 645
528 646
647/* number of seconds to wait for other partitions to disengage */
648#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
649
650/* interval in seconds to print 'waiting disengagement' messages */
651#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
652
653
529#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0])) 654#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
530 655
531 656
@@ -534,24 +659,20 @@ struct xpc_partition {
534extern struct xpc_registration xpc_registrations[]; 659extern struct xpc_registration xpc_registrations[];
535 660
536 661
537/* >>> found in xpc_main.c only */ 662/* found in xpc_main.c */
538extern struct device *xpc_part; 663extern struct device *xpc_part;
539extern struct device *xpc_chan; 664extern struct device *xpc_chan;
665extern int xpc_disengage_request_timelimit;
540extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *); 666extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
541extern void xpc_dropped_IPI_check(struct xpc_partition *); 667extern void xpc_dropped_IPI_check(struct xpc_partition *);
668extern void xpc_activate_partition(struct xpc_partition *);
542extern void xpc_activate_kthreads(struct xpc_channel *, int); 669extern void xpc_activate_kthreads(struct xpc_channel *, int);
543extern void xpc_create_kthreads(struct xpc_channel *, int); 670extern void xpc_create_kthreads(struct xpc_channel *, int);
544extern void xpc_disconnect_wait(int); 671extern void xpc_disconnect_wait(int);
545 672
546 673
547/* found in xpc_main.c and efi-xpc.c */
548extern void xpc_activate_partition(struct xpc_partition *);
549
550
551/* found in xpc_partition.c */ 674/* found in xpc_partition.c */
552extern int xpc_exiting; 675extern int xpc_exiting;
553extern int xpc_hb_interval;
554extern int xpc_hb_check_interval;
555extern struct xpc_vars *xpc_vars; 676extern struct xpc_vars *xpc_vars;
556extern struct xpc_rsvd_page *xpc_rsvd_page; 677extern struct xpc_rsvd_page *xpc_rsvd_page;
557extern struct xpc_vars_part *xpc_vars_part; 678extern struct xpc_vars_part *xpc_vars_part;
@@ -561,6 +682,7 @@ extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
561extern void xpc_allow_IPI_ops(void); 682extern void xpc_allow_IPI_ops(void);
562extern void xpc_restrict_IPI_ops(void); 683extern void xpc_restrict_IPI_ops(void);
563extern int xpc_identify_act_IRQ_sender(void); 684extern int xpc_identify_act_IRQ_sender(void);
685extern int xpc_partition_disengaged(struct xpc_partition *);
564extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *); 686extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
565extern void xpc_mark_partition_inactive(struct xpc_partition *); 687extern void xpc_mark_partition_inactive(struct xpc_partition *);
566extern void xpc_discovery(void); 688extern void xpc_discovery(void);
@@ -585,8 +707,8 @@ extern void xpc_connected_callout(struct xpc_channel *);
585extern void xpc_deliver_msg(struct xpc_channel *); 707extern void xpc_deliver_msg(struct xpc_channel *);
586extern void xpc_disconnect_channel(const int, struct xpc_channel *, 708extern void xpc_disconnect_channel(const int, struct xpc_channel *,
587 enum xpc_retval, unsigned long *); 709 enum xpc_retval, unsigned long *);
588extern void xpc_disconnected_callout(struct xpc_channel *); 710extern void xpc_disconnecting_callout(struct xpc_channel *);
589extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval); 711extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
590extern void xpc_teardown_infrastructure(struct xpc_partition *); 712extern void xpc_teardown_infrastructure(struct xpc_partition *);
591 713
592 714
@@ -674,6 +796,157 @@ xpc_part_ref(struct xpc_partition *part)
674 796
675 797
676/* 798/*
799 * This next set of inlines are used to keep track of when a partition is
800 * potentially engaged in accessing memory belonging to another partition.
801 */
802
803static inline void
804xpc_mark_partition_engaged(struct xpc_partition *part)
805{
806 unsigned long irq_flags;
807 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
808 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
809
810
811 local_irq_save(irq_flags);
812
813 /* set bit corresponding to our partid in remote partition's AMO */
814 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
815 (1UL << sn_partition_id));
816 /*
817 * We must always use the nofault function regardless of whether we
818 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
819 * didn't, we'd never know that the other partition is down and would
820 * keep sending IPIs and AMOs to it until the heartbeat times out.
821 */
822 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
823 variable), xp_nofault_PIOR_target));
824
825 local_irq_restore(irq_flags);
826}
827
828static inline void
829xpc_mark_partition_disengaged(struct xpc_partition *part)
830{
831 unsigned long irq_flags;
832 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
833 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
834
835
836 local_irq_save(irq_flags);
837
838 /* clear bit corresponding to our partid in remote partition's AMO */
839 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
840 ~(1UL << sn_partition_id));
841 /*
842 * We must always use the nofault function regardless of whether we
843 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
844 * didn't, we'd never know that the other partition is down and would
845 * keep sending IPIs and AMOs to it until the heartbeat times out.
846 */
847 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
848 variable), xp_nofault_PIOR_target));
849
850 local_irq_restore(irq_flags);
851}
852
853static inline void
854xpc_request_partition_disengage(struct xpc_partition *part)
855{
856 unsigned long irq_flags;
857 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
858 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
859
860
861 local_irq_save(irq_flags);
862
863 /* set bit corresponding to our partid in remote partition's AMO */
864 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
865 (1UL << sn_partition_id));
866 /*
867 * We must always use the nofault function regardless of whether we
868 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
869 * didn't, we'd never know that the other partition is down and would
870 * keep sending IPIs and AMOs to it until the heartbeat times out.
871 */
872 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
873 variable), xp_nofault_PIOR_target));
874
875 local_irq_restore(irq_flags);
876}
877
878static inline void
879xpc_cancel_partition_disengage_request(struct xpc_partition *part)
880{
881 unsigned long irq_flags;
882 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
883 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
884
885
886 local_irq_save(irq_flags);
887
888 /* clear bit corresponding to our partid in remote partition's AMO */
889 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
890 ~(1UL << sn_partition_id));
891 /*
892 * We must always use the nofault function regardless of whether we
893 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
894 * didn't, we'd never know that the other partition is down and would
895 * keep sending IPIs and AMOs to it until the heartbeat times out.
896 */
897 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
898 variable), xp_nofault_PIOR_target));
899
900 local_irq_restore(irq_flags);
901}
902
903static inline u64
904xpc_partition_engaged(u64 partid_mask)
905{
906 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
907
908
909 /* return our partition's AMO variable ANDed with partid_mask */
910 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
911 partid_mask);
912}
913
914static inline u64
915xpc_partition_disengage_requested(u64 partid_mask)
916{
917 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
918
919
920 /* return our partition's AMO variable ANDed with partid_mask */
921 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
922 partid_mask);
923}
924
925static inline void
926xpc_clear_partition_engaged(u64 partid_mask)
927{
928 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
929
930
931 /* clear bit(s) based on partid_mask in our partition's AMO */
932 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
933 ~partid_mask);
934}
935
936static inline void
937xpc_clear_partition_disengage_request(u64 partid_mask)
938{
939 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
940
941
942 /* clear bit(s) based on partid_mask in our partition's AMO */
943 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
944 ~partid_mask);
945}
946
947
948
949/*
677 * The following set of macros and inlines are used for the sending and 950 * The following set of macros and inlines are used for the sending and
678 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs, 951 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
679 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and 952 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
@@ -722,13 +995,13 @@ xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
722 * Flag the appropriate AMO variable and send an IPI to the specified node. 995 * Flag the appropriate AMO variable and send an IPI to the specified node.
723 */ 996 */
724static inline void 997static inline void
725xpc_activate_IRQ_send(u64 amos_page, int from_nasid, int to_nasid, 998xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
726 int to_phys_cpuid) 999 int to_phys_cpuid)
727{ 1000{
728 int w_index = XPC_NASID_W_INDEX(from_nasid); 1001 int w_index = XPC_NASID_W_INDEX(from_nasid);
729 int b_index = XPC_NASID_B_INDEX(from_nasid); 1002 int b_index = XPC_NASID_B_INDEX(from_nasid);
730 AMO_t *amos = (AMO_t *) __va(amos_page + 1003 AMO_t *amos = (AMO_t *) __va(amos_page_pa +
731 (XP_MAX_PARTITIONS * sizeof(AMO_t))); 1004 (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
732 1005
733 1006
734 (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid, 1007 (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
@@ -756,6 +1029,13 @@ xpc_IPI_send_reactivate(struct xpc_partition *part)
756 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid); 1029 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
757} 1030}
758 1031
1032static inline void
1033xpc_IPI_send_disengage(struct xpc_partition *part)
1034{
1035 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
1036 part->remote_act_nasid, part->remote_act_phys_cpuid);
1037}
1038
759 1039
760/* 1040/*
761 * IPIs associated with SGI_XPC_NOTIFY IRQ. 1041 * IPIs associated with SGI_XPC_NOTIFY IRQ.
@@ -836,6 +1116,7 @@ xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
836 1116
837/* given an AMO variable and a channel#, get its associated IPI flags */ 1117/* given an AMO variable and a channel#, get its associated IPI flags */
838#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff)) 1118#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
1119#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
839 1120
840#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f) 1121#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f)
841#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010) 1122#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010)
@@ -903,17 +1184,18 @@ xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
903 * cacheable mapping for the entire region. This will prevent speculative 1184 * cacheable mapping for the entire region. This will prevent speculative
904 * reading of cached copies of our lines from being issued which will cause 1185 * reading of cached copies of our lines from being issued which will cause
905 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 1186 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
906 * (XP_MAX_PARTITIONS) AMO variables for message notification (xpc_main.c) 1187 * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
907 * and an additional 16 AMO variables for partition activation (xpc_hb.c). 1188 * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
1189 * activation and 2 AMO variables for partition deactivation.
908 */ 1190 */
909static inline AMO_t * 1191static inline AMO_t *
910xpc_IPI_init(partid_t partid) 1192xpc_IPI_init(int index)
911{ 1193{
912 AMO_t *part_amo = xpc_vars->amos_page + partid; 1194 AMO_t *amo = xpc_vars->amos_page + index;
913 1195
914 1196
915 xpc_IPI_receive(part_amo); 1197 (void) xpc_IPI_receive(amo); /* clear AMO variable */
916 return part_amo; 1198 return amo;
917} 1199}
918 1200
919 1201
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 94698bea7be0..abf4fc2a87bb 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -57,6 +57,7 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
57 57
58 spin_lock_init(&ch->lock); 58 spin_lock_init(&ch->lock);
59 sema_init(&ch->msg_to_pull_sema, 1); /* mutex */ 59 sema_init(&ch->msg_to_pull_sema, 1); /* mutex */
60 sema_init(&ch->wdisconnect_sema, 0); /* event wait */
60 61
61 atomic_set(&ch->n_on_msg_allocate_wq, 0); 62 atomic_set(&ch->n_on_msg_allocate_wq, 0);
62 init_waitqueue_head(&ch->msg_allocate_wq); 63 init_waitqueue_head(&ch->msg_allocate_wq);
@@ -166,6 +167,7 @@ xpc_setup_infrastructure(struct xpc_partition *part)
166 xpc_initialize_channels(part, partid); 167 xpc_initialize_channels(part, partid);
167 168
168 atomic_set(&part->nchannels_active, 0); 169 atomic_set(&part->nchannels_active, 0);
170 atomic_set(&part->nchannels_engaged, 0);
169 171
170 172
171 /* local_IPI_amo were set to 0 by an earlier memset() */ 173 /* local_IPI_amo were set to 0 by an earlier memset() */
@@ -555,8 +557,6 @@ xpc_allocate_msgqueues(struct xpc_channel *ch)
555 sema_init(&ch->notify_queue[i].sema, 0); 557 sema_init(&ch->notify_queue[i].sema, 0);
556 } 558 }
557 559
558 sema_init(&ch->teardown_sema, 0); /* event wait */
559
560 spin_lock_irqsave(&ch->lock, irq_flags); 560 spin_lock_irqsave(&ch->lock, irq_flags);
561 ch->flags |= XPC_C_SETUP; 561 ch->flags |= XPC_C_SETUP;
562 spin_unlock_irqrestore(&ch->lock, irq_flags); 562 spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -626,6 +626,55 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
626 626
627 627
628/* 628/*
629 * Notify those who wanted to be notified upon delivery of their message.
630 */
631static void
632xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
633{
634 struct xpc_notify *notify;
635 u8 notify_type;
636 s64 get = ch->w_remote_GP.get - 1;
637
638
639 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
640
641 notify = &ch->notify_queue[get % ch->local_nentries];
642
643 /*
644 * See if the notify entry indicates it was associated with
645 * a message who's sender wants to be notified. It is possible
646 * that it is, but someone else is doing or has done the
647 * notification.
648 */
649 notify_type = notify->type;
650 if (notify_type == 0 ||
651 cmpxchg(&notify->type, notify_type, 0) !=
652 notify_type) {
653 continue;
654 }
655
656 DBUG_ON(notify_type != XPC_N_CALL);
657
658 atomic_dec(&ch->n_to_notify);
659
660 if (notify->func != NULL) {
661 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
662 "msg_number=%ld, partid=%d, channel=%d\n",
663 (void *) notify, get, ch->partid, ch->number);
664
665 notify->func(reason, ch->partid, ch->number,
666 notify->key);
667
668 dev_dbg(xpc_chan, "notify->func() returned, "
669 "notify=0x%p, msg_number=%ld, partid=%d, "
670 "channel=%d\n", (void *) notify, get,
671 ch->partid, ch->number);
672 }
673 }
674}
675
676
677/*
629 * Free up message queues and other stuff that were allocated for the specified 678 * Free up message queues and other stuff that were allocated for the specified
630 * channel. 679 * channel.
631 * 680 *
@@ -669,9 +718,6 @@ xpc_free_msgqueues(struct xpc_channel *ch)
669 ch->remote_msgqueue = NULL; 718 ch->remote_msgqueue = NULL;
670 kfree(ch->notify_queue); 719 kfree(ch->notify_queue);
671 ch->notify_queue = NULL; 720 ch->notify_queue = NULL;
672
673 /* in case someone is waiting for the teardown to complete */
674 up(&ch->teardown_sema);
675 } 721 }
676} 722}
677 723
@@ -683,7 +729,7 @@ static void
683xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) 729xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
684{ 730{
685 struct xpc_partition *part = &xpc_partitions[ch->partid]; 731 struct xpc_partition *part = &xpc_partitions[ch->partid];
686 u32 ch_flags = ch->flags; 732 u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
687 733
688 734
689 DBUG_ON(!spin_is_locked(&ch->lock)); 735 DBUG_ON(!spin_is_locked(&ch->lock));
@@ -701,12 +747,13 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
701 } 747 }
702 DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); 748 DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
703 749
704 /* it's now safe to free the channel's message queues */ 750 if (part->act_state == XPC_P_DEACTIVATING) {
705 751 /* can't proceed until the other side disengages from us */
706 xpc_free_msgqueues(ch); 752 if (xpc_partition_engaged(1UL << ch->partid)) {
707 DBUG_ON(ch->flags & XPC_C_SETUP); 753 return;
754 }
708 755
709 if (part->act_state != XPC_P_DEACTIVATING) { 756 } else {
710 757
711 /* as long as the other side is up do the full protocol */ 758 /* as long as the other side is up do the full protocol */
712 759
@@ -724,16 +771,42 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
724 } 771 }
725 } 772 }
726 773
774 /* wake those waiting for notify completion */
775 if (atomic_read(&ch->n_to_notify) > 0) {
776 /* >>> we do callout while holding ch->lock */
777 xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
778 }
779
727 /* both sides are disconnected now */ 780 /* both sides are disconnected now */
728 781
729 ch->flags = XPC_C_DISCONNECTED; /* clear all flags, but this one */ 782 /* it's now safe to free the channel's message queues */
783 xpc_free_msgqueues(ch);
784
785 /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
786 ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
730 787
731 atomic_dec(&part->nchannels_active); 788 atomic_dec(&part->nchannels_active);
732 789
733 if (ch_flags & XPC_C_WASCONNECTED) { 790 if (channel_was_connected) {
734 dev_info(xpc_chan, "channel %d to partition %d disconnected, " 791 dev_info(xpc_chan, "channel %d to partition %d disconnected, "
735 "reason=%d\n", ch->number, ch->partid, ch->reason); 792 "reason=%d\n", ch->number, ch->partid, ch->reason);
736 } 793 }
794
795 if (ch->flags & XPC_C_WDISCONNECT) {
796 spin_unlock_irqrestore(&ch->lock, *irq_flags);
797 up(&ch->wdisconnect_sema);
798 spin_lock_irqsave(&ch->lock, *irq_flags);
799
800 } else if (ch->delayed_IPI_flags) {
801 if (part->act_state != XPC_P_DEACTIVATING) {
802 /* time to take action on any delayed IPI flags */
803 spin_lock(&part->IPI_lock);
804 XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
805 ch->delayed_IPI_flags);
806 spin_unlock(&part->IPI_lock);
807 }
808 ch->delayed_IPI_flags = 0;
809 }
737} 810}
738 811
739 812
@@ -754,6 +827,19 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
754 827
755 spin_lock_irqsave(&ch->lock, irq_flags); 828 spin_lock_irqsave(&ch->lock, irq_flags);
756 829
830again:
831
832 if ((ch->flags & XPC_C_DISCONNECTED) &&
833 (ch->flags & XPC_C_WDISCONNECT)) {
834 /*
835 * Delay processing IPI flags until thread waiting disconnect
836 * has had a chance to see that the channel is disconnected.
837 */
838 ch->delayed_IPI_flags |= IPI_flags;
839 spin_unlock_irqrestore(&ch->lock, irq_flags);
840 return;
841 }
842
757 843
758 if (IPI_flags & XPC_IPI_CLOSEREQUEST) { 844 if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
759 845
@@ -764,7 +850,7 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
764 /* 850 /*
765 * If RCLOSEREQUEST is set, we're probably waiting for 851 * If RCLOSEREQUEST is set, we're probably waiting for
766 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed 852 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
767 * with this RCLOSEQREUQEST in the IPI_flags. 853 * with this RCLOSEREQUEST in the IPI_flags.
768 */ 854 */
769 855
770 if (ch->flags & XPC_C_RCLOSEREQUEST) { 856 if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -779,14 +865,22 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
779 865
780 /* both sides have finished disconnecting */ 866 /* both sides have finished disconnecting */
781 xpc_process_disconnect(ch, &irq_flags); 867 xpc_process_disconnect(ch, &irq_flags);
868 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
869 goto again;
782 } 870 }
783 871
784 if (ch->flags & XPC_C_DISCONNECTED) { 872 if (ch->flags & XPC_C_DISCONNECTED) {
785 // >>> explain this section
786
787 if (!(IPI_flags & XPC_IPI_OPENREQUEST)) { 873 if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
788 DBUG_ON(part->act_state != 874 if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
789 XPC_P_DEACTIVATING); 875 ch_number) & XPC_IPI_OPENREQUEST)) {
876
877 DBUG_ON(ch->delayed_IPI_flags != 0);
878 spin_lock(&part->IPI_lock);
879 XPC_SET_IPI_FLAGS(part->local_IPI_amo,
880 ch_number,
881 XPC_IPI_CLOSEREQUEST);
882 spin_unlock(&part->IPI_lock);
883 }
790 spin_unlock_irqrestore(&ch->lock, irq_flags); 884 spin_unlock_irqrestore(&ch->lock, irq_flags);
791 return; 885 return;
792 } 886 }
@@ -816,9 +910,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
816 } 910 }
817 911
818 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags); 912 XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
819 } else { 913
820 xpc_process_disconnect(ch, &irq_flags); 914 DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
915 spin_unlock_irqrestore(&ch->lock, irq_flags);
916 return;
821 } 917 }
918
919 xpc_process_disconnect(ch, &irq_flags);
822 } 920 }
823 921
824 922
@@ -834,7 +932,20 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
834 } 932 }
835 933
836 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST)); 934 DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
837 DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST)); 935
936 if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
937 if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
938 & XPC_IPI_CLOSEREQUEST)) {
939
940 DBUG_ON(ch->delayed_IPI_flags != 0);
941 spin_lock(&part->IPI_lock);
942 XPC_SET_IPI_FLAGS(part->local_IPI_amo,
943 ch_number, XPC_IPI_CLOSEREPLY);
944 spin_unlock(&part->IPI_lock);
945 }
946 spin_unlock_irqrestore(&ch->lock, irq_flags);
947 return;
948 }
838 949
839 ch->flags |= XPC_C_RCLOSEREPLY; 950 ch->flags |= XPC_C_RCLOSEREPLY;
840 951
@@ -852,8 +963,14 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
852 "channel=%d\n", args->msg_size, args->local_nentries, 963 "channel=%d\n", args->msg_size, args->local_nentries,
853 ch->partid, ch->number); 964 ch->partid, ch->number);
854 965
855 if ((ch->flags & XPC_C_DISCONNECTING) || 966 if (part->act_state == XPC_P_DEACTIVATING ||
856 part->act_state == XPC_P_DEACTIVATING) { 967 (ch->flags & XPC_C_ROPENREQUEST)) {
968 spin_unlock_irqrestore(&ch->lock, irq_flags);
969 return;
970 }
971
972 if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
973 ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
857 spin_unlock_irqrestore(&ch->lock, irq_flags); 974 spin_unlock_irqrestore(&ch->lock, irq_flags);
858 return; 975 return;
859 } 976 }
@@ -867,8 +984,11 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
867 * msg_size = size of channel's messages in bytes 984 * msg_size = size of channel's messages in bytes
868 * local_nentries = remote partition's local_nentries 985 * local_nentries = remote partition's local_nentries
869 */ 986 */
870 DBUG_ON(args->msg_size == 0); 987 if (args->msg_size == 0 || args->local_nentries == 0) {
871 DBUG_ON(args->local_nentries == 0); 988 /* assume OPENREQUEST was delayed by mistake */
989 spin_unlock_irqrestore(&ch->lock, irq_flags);
990 return;
991 }
872 992
873 ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING); 993 ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
874 ch->remote_nentries = args->local_nentries; 994 ch->remote_nentries = args->local_nentries;
@@ -906,7 +1026,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
906 spin_unlock_irqrestore(&ch->lock, irq_flags); 1026 spin_unlock_irqrestore(&ch->lock, irq_flags);
907 return; 1027 return;
908 } 1028 }
909 DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST)); 1029 if (!(ch->flags & XPC_C_OPENREQUEST)) {
1030 XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
1031 &irq_flags);
1032 spin_unlock_irqrestore(&ch->lock, irq_flags);
1033 return;
1034 }
1035
910 DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST)); 1036 DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
911 DBUG_ON(ch->flags & XPC_C_CONNECTED); 1037 DBUG_ON(ch->flags & XPC_C_CONNECTED);
912 1038
@@ -960,8 +1086,8 @@ xpc_connect_channel(struct xpc_channel *ch)
960 struct xpc_registration *registration = &xpc_registrations[ch->number]; 1086 struct xpc_registration *registration = &xpc_registrations[ch->number];
961 1087
962 1088
963 if (down_interruptible(&registration->sema) != 0) { 1089 if (down_trylock(&registration->sema) != 0) {
964 return xpcInterrupted; 1090 return xpcRetry;
965 } 1091 }
966 1092
967 if (!XPC_CHANNEL_REGISTERED(ch->number)) { 1093 if (!XPC_CHANNEL_REGISTERED(ch->number)) {
@@ -1040,55 +1166,6 @@ xpc_connect_channel(struct xpc_channel *ch)
1040 1166
1041 1167
1042/* 1168/*
1043 * Notify those who wanted to be notified upon delivery of their message.
1044 */
1045static void
1046xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
1047{
1048 struct xpc_notify *notify;
1049 u8 notify_type;
1050 s64 get = ch->w_remote_GP.get - 1;
1051
1052
1053 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
1054
1055 notify = &ch->notify_queue[get % ch->local_nentries];
1056
1057 /*
1058 * See if the notify entry indicates it was associated with
1059 * a message who's sender wants to be notified. It is possible
1060 * that it is, but someone else is doing or has done the
1061 * notification.
1062 */
1063 notify_type = notify->type;
1064 if (notify_type == 0 ||
1065 cmpxchg(&notify->type, notify_type, 0) !=
1066 notify_type) {
1067 continue;
1068 }
1069
1070 DBUG_ON(notify_type != XPC_N_CALL);
1071
1072 atomic_dec(&ch->n_to_notify);
1073
1074 if (notify->func != NULL) {
1075 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
1076 "msg_number=%ld, partid=%d, channel=%d\n",
1077 (void *) notify, get, ch->partid, ch->number);
1078
1079 notify->func(reason, ch->partid, ch->number,
1080 notify->key);
1081
1082 dev_dbg(xpc_chan, "notify->func() returned, "
1083 "notify=0x%p, msg_number=%ld, partid=%d, "
1084 "channel=%d\n", (void *) notify, get,
1085 ch->partid, ch->number);
1086 }
1087 }
1088}
1089
1090
1091/*
1092 * Clear some of the msg flags in the local message queue. 1169 * Clear some of the msg flags in the local message queue.
1093 */ 1170 */
1094static inline void 1171static inline void
@@ -1240,6 +1317,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
1240 u64 IPI_amo, IPI_flags; 1317 u64 IPI_amo, IPI_flags;
1241 struct xpc_channel *ch; 1318 struct xpc_channel *ch;
1242 int ch_number; 1319 int ch_number;
1320 u32 ch_flags;
1243 1321
1244 1322
1245 IPI_amo = xpc_get_IPI_flags(part); 1323 IPI_amo = xpc_get_IPI_flags(part);
@@ -1266,8 +1344,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
1266 xpc_process_openclose_IPI(part, ch_number, IPI_flags); 1344 xpc_process_openclose_IPI(part, ch_number, IPI_flags);
1267 } 1345 }
1268 1346
1347 ch_flags = ch->flags; /* need an atomic snapshot of flags */
1269 1348
1270 if (ch->flags & XPC_C_DISCONNECTING) { 1349 if (ch_flags & XPC_C_DISCONNECTING) {
1271 spin_lock_irqsave(&ch->lock, irq_flags); 1350 spin_lock_irqsave(&ch->lock, irq_flags);
1272 xpc_process_disconnect(ch, &irq_flags); 1351 xpc_process_disconnect(ch, &irq_flags);
1273 spin_unlock_irqrestore(&ch->lock, irq_flags); 1352 spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1278,9 +1357,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
1278 continue; 1357 continue;
1279 } 1358 }
1280 1359
1281 if (!(ch->flags & XPC_C_CONNECTED)) { 1360 if (!(ch_flags & XPC_C_CONNECTED)) {
1282 if (!(ch->flags & XPC_C_OPENREQUEST)) { 1361 if (!(ch_flags & XPC_C_OPENREQUEST)) {
1283 DBUG_ON(ch->flags & XPC_C_SETUP); 1362 DBUG_ON(ch_flags & XPC_C_SETUP);
1284 (void) xpc_connect_channel(ch); 1363 (void) xpc_connect_channel(ch);
1285 } else { 1364 } else {
1286 spin_lock_irqsave(&ch->lock, irq_flags); 1365 spin_lock_irqsave(&ch->lock, irq_flags);
@@ -1305,8 +1384,8 @@ xpc_process_channel_activity(struct xpc_partition *part)
1305 1384
1306 1385
1307/* 1386/*
1308 * XPC's heartbeat code calls this function to inform XPC that a partition has 1387 * XPC's heartbeat code calls this function to inform XPC that a partition is
1309 * gone down. XPC responds by tearing down the XPartition Communication 1388 * going down. XPC responds by tearing down the XPartition Communication
1310 * infrastructure used for the just downed partition. 1389 * infrastructure used for the just downed partition.
1311 * 1390 *
1312 * XPC's heartbeat code will never call this function and xpc_partition_up() 1391 * XPC's heartbeat code will never call this function and xpc_partition_up()
@@ -1314,7 +1393,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
1314 * at the same time. 1393 * at the same time.
1315 */ 1394 */
1316void 1395void
1317xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason) 1396xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
1318{ 1397{
1319 unsigned long irq_flags; 1398 unsigned long irq_flags;
1320 int ch_number; 1399 int ch_number;
@@ -1330,12 +1409,11 @@ xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
1330 } 1409 }
1331 1410
1332 1411
1333 /* disconnect all channels associated with the downed partition */ 1412 /* disconnect channels associated with the partition going down */
1334 1413
1335 for (ch_number = 0; ch_number < part->nchannels; ch_number++) { 1414 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1336 ch = &part->channels[ch_number]; 1415 ch = &part->channels[ch_number];
1337 1416
1338
1339 xpc_msgqueue_ref(ch); 1417 xpc_msgqueue_ref(ch);
1340 spin_lock_irqsave(&ch->lock, irq_flags); 1418 spin_lock_irqsave(&ch->lock, irq_flags);
1341 1419
@@ -1370,6 +1448,7 @@ xpc_teardown_infrastructure(struct xpc_partition *part)
1370 * this partition. 1448 * this partition.
1371 */ 1449 */
1372 1450
1451 DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
1373 DBUG_ON(atomic_read(&part->nchannels_active) != 0); 1452 DBUG_ON(atomic_read(&part->nchannels_active) != 0);
1374 DBUG_ON(part->setup_state != XPC_P_SETUP); 1453 DBUG_ON(part->setup_state != XPC_P_SETUP);
1375 part->setup_state = XPC_P_WTEARDOWN; 1454 part->setup_state = XPC_P_WTEARDOWN;
@@ -1428,19 +1507,11 @@ xpc_initiate_connect(int ch_number)
1428 if (xpc_part_ref(part)) { 1507 if (xpc_part_ref(part)) {
1429 ch = &part->channels[ch_number]; 1508 ch = &part->channels[ch_number];
1430 1509
1431 if (!(ch->flags & XPC_C_DISCONNECTING)) { 1510 /*
1432 DBUG_ON(ch->flags & XPC_C_OPENREQUEST); 1511 * Initiate the establishment of a connection on the
1433 DBUG_ON(ch->flags & XPC_C_CONNECTED); 1512 * newly registered channel to the remote partition.
1434 DBUG_ON(ch->flags & XPC_C_SETUP); 1513 */
1435 1514 xpc_wakeup_channel_mgr(part);
1436 /*
1437 * Initiate the establishment of a connection
1438 * on the newly registered channel to the
1439 * remote partition.
1440 */
1441 xpc_wakeup_channel_mgr(part);
1442 }
1443
1444 xpc_part_deref(part); 1515 xpc_part_deref(part);
1445 } 1516 }
1446 } 1517 }
@@ -1450,9 +1521,6 @@ xpc_initiate_connect(int ch_number)
1450void 1521void
1451xpc_connected_callout(struct xpc_channel *ch) 1522xpc_connected_callout(struct xpc_channel *ch)
1452{ 1523{
1453 unsigned long irq_flags;
1454
1455
1456 /* let the registerer know that a connection has been established */ 1524 /* let the registerer know that a connection has been established */
1457 1525
1458 if (ch->func != NULL) { 1526 if (ch->func != NULL) {
@@ -1465,10 +1533,6 @@ xpc_connected_callout(struct xpc_channel *ch)
1465 dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, " 1533 dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
1466 "partid=%d, channel=%d\n", ch->partid, ch->number); 1534 "partid=%d, channel=%d\n", ch->partid, ch->number);
1467 } 1535 }
1468
1469 spin_lock_irqsave(&ch->lock, irq_flags);
1470 ch->flags |= XPC_C_CONNECTCALLOUT;
1471 spin_unlock_irqrestore(&ch->lock, irq_flags);
1472} 1536}
1473 1537
1474 1538
@@ -1506,8 +1570,12 @@ xpc_initiate_disconnect(int ch_number)
1506 1570
1507 spin_lock_irqsave(&ch->lock, irq_flags); 1571 spin_lock_irqsave(&ch->lock, irq_flags);
1508 1572
1509 XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering, 1573 if (!(ch->flags & XPC_C_DISCONNECTED)) {
1574 ch->flags |= XPC_C_WDISCONNECT;
1575
1576 XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
1510 &irq_flags); 1577 &irq_flags);
1578 }
1511 1579
1512 spin_unlock_irqrestore(&ch->lock, irq_flags); 1580 spin_unlock_irqrestore(&ch->lock, irq_flags);
1513 1581
@@ -1523,8 +1591,9 @@ xpc_initiate_disconnect(int ch_number)
1523/* 1591/*
1524 * To disconnect a channel, and reflect it back to all who may be waiting. 1592 * To disconnect a channel, and reflect it back to all who may be waiting.
1525 * 1593 *
1526 * >>> An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by 1594 * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
1527 * >>> xpc_free_msgqueues(). 1595 * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
1596 * xpc_disconnect_wait().
1528 * 1597 *
1529 * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. 1598 * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
1530 */ 1599 */
@@ -1532,7 +1601,7 @@ void
1532xpc_disconnect_channel(const int line, struct xpc_channel *ch, 1601xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1533 enum xpc_retval reason, unsigned long *irq_flags) 1602 enum xpc_retval reason, unsigned long *irq_flags)
1534{ 1603{
1535 u32 flags; 1604 u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
1536 1605
1537 1606
1538 DBUG_ON(!spin_is_locked(&ch->lock)); 1607 DBUG_ON(!spin_is_locked(&ch->lock));
@@ -1547,61 +1616,53 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1547 1616
1548 XPC_SET_REASON(ch, reason, line); 1617 XPC_SET_REASON(ch, reason, line);
1549 1618
1550 flags = ch->flags; 1619 ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
1551 /* some of these may not have been set */ 1620 /* some of these may not have been set */
1552 ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | 1621 ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
1553 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | 1622 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
1554 XPC_C_CONNECTING | XPC_C_CONNECTED); 1623 XPC_C_CONNECTING | XPC_C_CONNECTED);
1555 1624
1556 ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
1557 xpc_IPI_send_closerequest(ch, irq_flags); 1625 xpc_IPI_send_closerequest(ch, irq_flags);
1558 1626
1559 if (flags & XPC_C_CONNECTED) { 1627 if (channel_was_connected) {
1560 ch->flags |= XPC_C_WASCONNECTED; 1628 ch->flags |= XPC_C_WASCONNECTED;
1561 } 1629 }
1562 1630
1631 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1632
1633 /* wake all idle kthreads so they can exit */
1563 if (atomic_read(&ch->kthreads_idle) > 0) { 1634 if (atomic_read(&ch->kthreads_idle) > 0) {
1564 /* wake all idle kthreads so they can exit */
1565 wake_up_all(&ch->idle_wq); 1635 wake_up_all(&ch->idle_wq);
1566 } 1636 }
1567 1637
1568 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1569
1570
1571 /* wake those waiting to allocate an entry from the local msg queue */ 1638 /* wake those waiting to allocate an entry from the local msg queue */
1572
1573 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { 1639 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
1574 wake_up(&ch->msg_allocate_wq); 1640 wake_up(&ch->msg_allocate_wq);
1575 } 1641 }
1576 1642
1577 /* wake those waiting for notify completion */
1578
1579 if (atomic_read(&ch->n_to_notify) > 0) {
1580 xpc_notify_senders(ch, reason, ch->w_local_GP.put);
1581 }
1582
1583 spin_lock_irqsave(&ch->lock, *irq_flags); 1643 spin_lock_irqsave(&ch->lock, *irq_flags);
1584} 1644}
1585 1645
1586 1646
1587void 1647void
1588xpc_disconnected_callout(struct xpc_channel *ch) 1648xpc_disconnecting_callout(struct xpc_channel *ch)
1589{ 1649{
1590 /* 1650 /*
1591 * Let the channel's registerer know that the channel is now 1651 * Let the channel's registerer know that the channel is being
1592 * disconnected. We don't want to do this if the registerer was never 1652 * disconnected. We don't want to do this if the registerer was never
1593 * informed of a connection being made, unless the disconnect was for 1653 * informed of a connection being made.
1594 * abnormal reasons.
1595 */ 1654 */
1596 1655
1597 if (ch->func != NULL) { 1656 if (ch->func != NULL) {
1598 dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " 1657 dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting,"
1599 "channel=%d\n", ch->reason, ch->partid, ch->number); 1658 " partid=%d, channel=%d\n", ch->partid, ch->number);
1600 1659
1601 ch->func(ch->reason, ch->partid, ch->number, NULL, ch->key); 1660 ch->func(xpcDisconnecting, ch->partid, ch->number, NULL,
1661 ch->key);
1602 1662
1603 dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " 1663 dev_dbg(xpc_chan, "ch->func() returned, reason="
1604 "channel=%d\n", ch->reason, ch->partid, ch->number); 1664 "xpcDisconnecting, partid=%d, channel=%d\n",
1665 ch->partid, ch->number);
1605 } 1666 }
1606} 1667}
1607 1668
@@ -1848,7 +1909,7 @@ xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
1848 xpc_notify_func func, void *key) 1909 xpc_notify_func func, void *key)
1849{ 1910{
1850 enum xpc_retval ret = xpcSuccess; 1911 enum xpc_retval ret = xpcSuccess;
1851 struct xpc_notify *notify = NULL; // >>> to keep the compiler happy!! 1912 struct xpc_notify *notify = notify;
1852 s64 put, msg_number = msg->number; 1913 s64 put, msg_number = msg->number;
1853 1914
1854 1915
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index ed7c21586e98..cece3c7c69be 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -54,6 +54,7 @@
54#include <linux/interrupt.h> 54#include <linux/interrupt.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
57#include <linux/reboot.h>
57#include <asm/sn/intr.h> 58#include <asm/sn/intr.h>
58#include <asm/sn/sn_sal.h> 59#include <asm/sn/sn_sal.h>
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
@@ -82,11 +83,17 @@ struct device *xpc_chan = &xpc_chan_dbg_subname;
82 83
83/* systune related variables for /proc/sys directories */ 84/* systune related variables for /proc/sys directories */
84 85
85static int xpc_hb_min = 1; 86static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
86static int xpc_hb_max = 10; 87static int xpc_hb_min_interval = 1;
88static int xpc_hb_max_interval = 10;
87 89
88static int xpc_hb_check_min = 10; 90static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
89static int xpc_hb_check_max = 120; 91static int xpc_hb_check_min_interval = 10;
92static int xpc_hb_check_max_interval = 120;
93
94int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
95static int xpc_disengage_request_min_timelimit = 0;
96static int xpc_disengage_request_max_timelimit = 120;
90 97
91static ctl_table xpc_sys_xpc_hb_dir[] = { 98static ctl_table xpc_sys_xpc_hb_dir[] = {
92 { 99 {
@@ -99,7 +106,8 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
99 &proc_dointvec_minmax, 106 &proc_dointvec_minmax,
100 &sysctl_intvec, 107 &sysctl_intvec,
101 NULL, 108 NULL,
102 &xpc_hb_min, &xpc_hb_max 109 &xpc_hb_min_interval,
110 &xpc_hb_max_interval
103 }, 111 },
104 { 112 {
105 2, 113 2,
@@ -111,7 +119,8 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
111 &proc_dointvec_minmax, 119 &proc_dointvec_minmax,
112 &sysctl_intvec, 120 &sysctl_intvec,
113 NULL, 121 NULL,
114 &xpc_hb_check_min, &xpc_hb_check_max 122 &xpc_hb_check_min_interval,
123 &xpc_hb_check_max_interval
115 }, 124 },
116 {0} 125 {0}
117}; 126};
@@ -124,6 +133,19 @@ static ctl_table xpc_sys_xpc_dir[] = {
124 0555, 133 0555,
125 xpc_sys_xpc_hb_dir 134 xpc_sys_xpc_hb_dir
126 }, 135 },
136 {
137 2,
138 "disengage_request_timelimit",
139 &xpc_disengage_request_timelimit,
140 sizeof(int),
141 0644,
142 NULL,
143 &proc_dointvec_minmax,
144 &sysctl_intvec,
145 NULL,
146 &xpc_disengage_request_min_timelimit,
147 &xpc_disengage_request_max_timelimit
148 },
127 {0} 149 {0}
128}; 150};
129static ctl_table xpc_sys_dir[] = { 151static ctl_table xpc_sys_dir[] = {
@@ -148,10 +170,10 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
148 170
149static unsigned long xpc_hb_check_timeout; 171static unsigned long xpc_hb_check_timeout;
150 172
151/* xpc_hb_checker thread exited notification */ 173/* notification that the xpc_hb_checker thread has exited */
152static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited); 174static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
153 175
154/* xpc_discovery thread exited notification */ 176/* notification that the xpc_discovery thread has exited */
155static DECLARE_MUTEX_LOCKED(xpc_discovery_exited); 177static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
156 178
157 179
@@ -161,6 +183,30 @@ static struct timer_list xpc_hb_timer;
161static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); 183static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
162 184
163 185
186static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
187static struct notifier_block xpc_reboot_notifier = {
188 .notifier_call = xpc_system_reboot,
189};
190
191
192/*
193 * Timer function to enforce the timelimit on the partition disengage request.
194 */
195static void
196xpc_timeout_partition_disengage_request(unsigned long data)
197{
198 struct xpc_partition *part = (struct xpc_partition *) data;
199
200
201 DBUG_ON(jiffies < part->disengage_request_timeout);
202
203 (void) xpc_partition_disengaged(part);
204
205 DBUG_ON(part->disengage_request_timeout != 0);
206 DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
207}
208
209
164/* 210/*
165 * Notify the heartbeat check thread that an IRQ has been received. 211 * Notify the heartbeat check thread that an IRQ has been received.
166 */ 212 */
@@ -214,12 +260,6 @@ xpc_hb_checker(void *ignore)
214 260
215 while (!(volatile int) xpc_exiting) { 261 while (!(volatile int) xpc_exiting) {
216 262
217 /* wait for IRQ or timeout */
218 (void) wait_event_interruptible(xpc_act_IRQ_wq,
219 (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
220 jiffies >= xpc_hb_check_timeout ||
221 (volatile int) xpc_exiting));
222
223 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " 263 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
224 "been received\n", 264 "been received\n",
225 (int) (xpc_hb_check_timeout - jiffies), 265 (int) (xpc_hb_check_timeout - jiffies),
@@ -240,6 +280,7 @@ xpc_hb_checker(void *ignore)
240 } 280 }
241 281
242 282
283 /* check for outstanding IRQs */
243 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); 284 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
244 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { 285 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
245 force_IRQ = 0; 286 force_IRQ = 0;
@@ -257,12 +298,18 @@ xpc_hb_checker(void *ignore)
257 xpc_hb_check_timeout = jiffies + 298 xpc_hb_check_timeout = jiffies +
258 (xpc_hb_check_interval * HZ); 299 (xpc_hb_check_interval * HZ);
259 } 300 }
301
302 /* wait for IRQ or timeout */
303 (void) wait_event_interruptible(xpc_act_IRQ_wq,
304 (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
305 jiffies >= xpc_hb_check_timeout ||
306 (volatile int) xpc_exiting));
260 } 307 }
261 308
262 dev_dbg(xpc_part, "heartbeat checker is exiting\n"); 309 dev_dbg(xpc_part, "heartbeat checker is exiting\n");
263 310
264 311
265 /* mark this thread as inactive */ 312 /* mark this thread as having exited */
266 up(&xpc_hb_checker_exited); 313 up(&xpc_hb_checker_exited);
267 return 0; 314 return 0;
268} 315}
@@ -282,7 +329,7 @@ xpc_initiate_discovery(void *ignore)
282 329
283 dev_dbg(xpc_part, "discovery thread is exiting\n"); 330 dev_dbg(xpc_part, "discovery thread is exiting\n");
284 331
285 /* mark this thread as inactive */ 332 /* mark this thread as having exited */
286 up(&xpc_discovery_exited); 333 up(&xpc_discovery_exited);
287 return 0; 334 return 0;
288} 335}
@@ -309,7 +356,7 @@ xpc_make_first_contact(struct xpc_partition *part)
309 "partition %d\n", XPC_PARTID(part)); 356 "partition %d\n", XPC_PARTID(part));
310 357
311 /* wait a 1/4 of a second or so */ 358 /* wait a 1/4 of a second or so */
312 msleep_interruptible(250); 359 (void) msleep_interruptible(250);
313 360
314 if (part->act_state == XPC_P_DEACTIVATING) { 361 if (part->act_state == XPC_P_DEACTIVATING) {
315 return part->reason; 362 return part->reason;
@@ -336,7 +383,8 @@ static void
336xpc_channel_mgr(struct xpc_partition *part) 383xpc_channel_mgr(struct xpc_partition *part)
337{ 384{
338 while (part->act_state != XPC_P_DEACTIVATING || 385 while (part->act_state != XPC_P_DEACTIVATING ||
339 atomic_read(&part->nchannels_active) > 0) { 386 atomic_read(&part->nchannels_active) > 0 ||
387 !xpc_partition_disengaged(part)) {
340 388
341 xpc_process_channel_activity(part); 389 xpc_process_channel_activity(part);
342 390
@@ -360,7 +408,8 @@ xpc_channel_mgr(struct xpc_partition *part)
360 (volatile u64) part->local_IPI_amo != 0 || 408 (volatile u64) part->local_IPI_amo != 0 ||
361 ((volatile u8) part->act_state == 409 ((volatile u8) part->act_state ==
362 XPC_P_DEACTIVATING && 410 XPC_P_DEACTIVATING &&
363 atomic_read(&part->nchannels_active) == 0))); 411 atomic_read(&part->nchannels_active) == 0 &&
412 xpc_partition_disengaged(part))));
364 atomic_set(&part->channel_mgr_requests, 1); 413 atomic_set(&part->channel_mgr_requests, 1);
365 414
366 // >>> Does it need to wakeup periodically as well? In case we 415 // >>> Does it need to wakeup periodically as well? In case we
@@ -482,7 +531,7 @@ xpc_activating(void *__partid)
482 return 0; 531 return 0;
483 } 532 }
484 533
485 XPC_ALLOW_HB(partid, xpc_vars); 534 xpc_allow_hb(partid, xpc_vars);
486 xpc_IPI_send_activated(part); 535 xpc_IPI_send_activated(part);
487 536
488 537
@@ -492,6 +541,7 @@ xpc_activating(void *__partid)
492 */ 541 */
493 (void) xpc_partition_up(part); 542 (void) xpc_partition_up(part);
494 543
544 xpc_disallow_hb(partid, xpc_vars);
495 xpc_mark_partition_inactive(part); 545 xpc_mark_partition_inactive(part);
496 546
497 if (part->reason == xpcReactivating) { 547 if (part->reason == xpcReactivating) {
@@ -670,6 +720,7 @@ xpc_daemonize_kthread(void *args)
670 struct xpc_partition *part = &xpc_partitions[partid]; 720 struct xpc_partition *part = &xpc_partitions[partid];
671 struct xpc_channel *ch; 721 struct xpc_channel *ch;
672 int n_needed; 722 int n_needed;
723 unsigned long irq_flags;
673 724
674 725
675 daemonize("xpc%02dc%d", partid, ch_number); 726 daemonize("xpc%02dc%d", partid, ch_number);
@@ -680,11 +731,14 @@ xpc_daemonize_kthread(void *args)
680 ch = &part->channels[ch_number]; 731 ch = &part->channels[ch_number];
681 732
682 if (!(ch->flags & XPC_C_DISCONNECTING)) { 733 if (!(ch->flags & XPC_C_DISCONNECTING)) {
683 DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
684 734
685 /* let registerer know that connection has been established */ 735 /* let registerer know that connection has been established */
686 736
687 if (atomic_read(&ch->kthreads_assigned) == 1) { 737 spin_lock_irqsave(&ch->lock, irq_flags);
738 if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
739 ch->flags |= XPC_C_CONNECTCALLOUT;
740 spin_unlock_irqrestore(&ch->lock, irq_flags);
741
688 xpc_connected_callout(ch); 742 xpc_connected_callout(ch);
689 743
690 /* 744 /*
@@ -699,16 +753,28 @@ xpc_daemonize_kthread(void *args)
699 !(ch->flags & XPC_C_DISCONNECTING)) { 753 !(ch->flags & XPC_C_DISCONNECTING)) {
700 xpc_activate_kthreads(ch, n_needed); 754 xpc_activate_kthreads(ch, n_needed);
701 } 755 }
756 } else {
757 spin_unlock_irqrestore(&ch->lock, irq_flags);
702 } 758 }
703 759
704 xpc_kthread_waitmsgs(part, ch); 760 xpc_kthread_waitmsgs(part, ch);
705 } 761 }
706 762
707 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 763 if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
708 ((ch->flags & XPC_C_CONNECTCALLOUT) || 764 spin_lock_irqsave(&ch->lock, irq_flags);
709 (ch->reason != xpcUnregistering && 765 if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
710 ch->reason != xpcOtherUnregistering))) { 766 !(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
711 xpc_disconnected_callout(ch); 767 ch->flags |= XPC_C_DISCONNECTCALLOUT;
768 spin_unlock_irqrestore(&ch->lock, irq_flags);
769
770 xpc_disconnecting_callout(ch);
771 } else {
772 spin_unlock_irqrestore(&ch->lock, irq_flags);
773 }
774 if (atomic_dec_return(&part->nchannels_engaged) == 0) {
775 xpc_mark_partition_disengaged(part);
776 xpc_IPI_send_disengage(part);
777 }
712 } 778 }
713 779
714 780
@@ -740,12 +806,33 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
740 unsigned long irq_flags; 806 unsigned long irq_flags;
741 pid_t pid; 807 pid_t pid;
742 u64 args = XPC_PACK_ARGS(ch->partid, ch->number); 808 u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
809 struct xpc_partition *part = &xpc_partitions[ch->partid];
743 810
744 811
745 while (needed-- > 0) { 812 while (needed-- > 0) {
813
814 /*
815 * The following is done on behalf of the newly created
816 * kthread. That kthread is responsible for doing the
817 * counterpart to the following before it exits.
818 */
819 (void) xpc_part_ref(part);
820 xpc_msgqueue_ref(ch);
821 if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
822 atomic_inc_return(&part->nchannels_engaged) == 1) {
823 xpc_mark_partition_engaged(part);
824 }
825
746 pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); 826 pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
747 if (pid < 0) { 827 if (pid < 0) {
748 /* the fork failed */ 828 /* the fork failed */
829 if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
830 atomic_dec_return(&part->nchannels_engaged) == 0) {
831 xpc_mark_partition_disengaged(part);
832 xpc_IPI_send_disengage(part);
833 }
834 xpc_msgqueue_deref(ch);
835 xpc_part_deref(part);
749 836
750 if (atomic_read(&ch->kthreads_assigned) < 837 if (atomic_read(&ch->kthreads_assigned) <
751 ch->kthreads_idle_limit) { 838 ch->kthreads_idle_limit) {
@@ -765,14 +852,6 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
765 break; 852 break;
766 } 853 }
767 854
768 /*
769 * The following is done on behalf of the newly created
770 * kthread. That kthread is responsible for doing the
771 * counterpart to the following before it exits.
772 */
773 (void) xpc_part_ref(&xpc_partitions[ch->partid]);
774 xpc_msgqueue_ref(ch);
775 atomic_inc(&ch->kthreads_assigned);
776 ch->kthreads_created++; // >>> temporary debug only!!! 855 ch->kthreads_created++; // >>> temporary debug only!!!
777 } 856 }
778} 857}
@@ -781,87 +860,142 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
781void 860void
782xpc_disconnect_wait(int ch_number) 861xpc_disconnect_wait(int ch_number)
783{ 862{
863 unsigned long irq_flags;
784 partid_t partid; 864 partid_t partid;
785 struct xpc_partition *part; 865 struct xpc_partition *part;
786 struct xpc_channel *ch; 866 struct xpc_channel *ch;
867 int wakeup_channel_mgr;
787 868
788 869
789 /* now wait for all callouts to the caller's function to cease */ 870 /* now wait for all callouts to the caller's function to cease */
790 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 871 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
791 part = &xpc_partitions[partid]; 872 part = &xpc_partitions[partid];
792 873
793 if (xpc_part_ref(part)) { 874 if (!xpc_part_ref(part)) {
794 ch = &part->channels[ch_number]; 875 continue;
876 }
795 877
796// >>> how do we keep from falling into the window between our check and going 878 ch = &part->channels[ch_number];
797// >>> down and coming back up where sema is re-inited?
798 if (ch->flags & XPC_C_SETUP) {
799 (void) down(&ch->teardown_sema);
800 }
801 879
880 if (!(ch->flags & XPC_C_WDISCONNECT)) {
802 xpc_part_deref(part); 881 xpc_part_deref(part);
882 continue;
883 }
884
885 (void) down(&ch->wdisconnect_sema);
886
887 spin_lock_irqsave(&ch->lock, irq_flags);
888 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
889 wakeup_channel_mgr = 0;
890
891 if (ch->delayed_IPI_flags) {
892 if (part->act_state != XPC_P_DEACTIVATING) {
893 spin_lock(&part->IPI_lock);
894 XPC_SET_IPI_FLAGS(part->local_IPI_amo,
895 ch->number, ch->delayed_IPI_flags);
896 spin_unlock(&part->IPI_lock);
897 wakeup_channel_mgr = 1;
898 }
899 ch->delayed_IPI_flags = 0;
803 } 900 }
901
902 ch->flags &= ~XPC_C_WDISCONNECT;
903 spin_unlock_irqrestore(&ch->lock, irq_flags);
904
905 if (wakeup_channel_mgr) {
906 xpc_wakeup_channel_mgr(part);
907 }
908
909 xpc_part_deref(part);
804 } 910 }
805} 911}
806 912
807 913
808static void 914static void
809xpc_do_exit(void) 915xpc_do_exit(enum xpc_retval reason)
810{ 916{
811 partid_t partid; 917 partid_t partid;
812 int active_part_count; 918 int active_part_count;
813 struct xpc_partition *part; 919 struct xpc_partition *part;
920 unsigned long printmsg_time;
814 921
815 922
816 /* now it's time to eliminate our heartbeat */ 923 /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
817 del_timer_sync(&xpc_hb_timer); 924 DBUG_ON(xpc_exiting == 1);
818 xpc_vars->heartbeating_to_mask = 0;
819
820 /* indicate to others that our reserved page is uninitialized */
821 xpc_rsvd_page->vars_pa = 0;
822
823 /*
824 * Ignore all incoming interrupts. Without interupts the heartbeat
825 * checker won't activate any new partitions that may come up.
826 */
827 free_irq(SGI_XPC_ACTIVATE, NULL);
828 925
829 /* 926 /*
830 * Cause the heartbeat checker and the discovery threads to exit. 927 * Let the heartbeat checker thread and the discovery thread
831 * We don't want them attempting to activate new partitions as we 928 * (if one is running) know that they should exit. Also wake up
832 * try to deactivate the existing ones. 929 * the heartbeat checker thread in case it's sleeping.
833 */ 930 */
834 xpc_exiting = 1; 931 xpc_exiting = 1;
835 wake_up_interruptible(&xpc_act_IRQ_wq); 932 wake_up_interruptible(&xpc_act_IRQ_wq);
836 933
837 /* wait for the heartbeat checker thread to mark itself inactive */ 934 /* ignore all incoming interrupts */
838 down(&xpc_hb_checker_exited); 935 free_irq(SGI_XPC_ACTIVATE, NULL);
839 936
840 /* wait for the discovery thread to mark itself inactive */ 937 /* wait for the discovery thread to exit */
841 down(&xpc_discovery_exited); 938 down(&xpc_discovery_exited);
842 939
940 /* wait for the heartbeat checker thread to exit */
941 down(&xpc_hb_checker_exited);
843 942
844 msleep_interruptible(300); 943
944 /* sleep for a 1/3 of a second or so */
945 (void) msleep_interruptible(300);
845 946
846 947
847 /* wait for all partitions to become inactive */ 948 /* wait for all partitions to become inactive */
848 949
950 printmsg_time = jiffies;
951
849 do { 952 do {
850 active_part_count = 0; 953 active_part_count = 0;
851 954
852 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 955 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
853 part = &xpc_partitions[partid]; 956 part = &xpc_partitions[partid];
854 if (part->act_state != XPC_P_INACTIVE) {
855 active_part_count++;
856 957
857 XPC_DEACTIVATE_PARTITION(part, xpcUnloading); 958 if (xpc_partition_disengaged(part) &&
959 part->act_state == XPC_P_INACTIVE) {
960 continue;
858 } 961 }
962
963 active_part_count++;
964
965 XPC_DEACTIVATE_PARTITION(part, reason);
859 } 966 }
860 967
861 if (active_part_count) 968 if (active_part_count == 0) {
862 msleep_interruptible(300); 969 break;
863 } while (active_part_count > 0); 970 }
864 971
972 if (jiffies >= printmsg_time) {
973 dev_info(xpc_part, "waiting for partitions to "
974 "deactivate/disengage, active count=%d, remote "
975 "engaged=0x%lx\n", active_part_count,
976 xpc_partition_engaged(1UL << partid));
977
978 printmsg_time = jiffies +
979 (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
980 }
981
982 /* sleep for a 1/3 of a second or so */
983 (void) msleep_interruptible(300);
984
985 } while (1);
986
987 DBUG_ON(xpc_partition_engaged(-1UL));
988
989
990 /* indicate to others that our reserved page is uninitialized */
991 xpc_rsvd_page->vars_pa = 0;
992
993 /* now it's time to eliminate our heartbeat */
994 del_timer_sync(&xpc_hb_timer);
995 DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
996
997 /* take ourselves off of the reboot_notifier_list */
998 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
865 999
866 /* close down protections for IPI operations */ 1000 /* close down protections for IPI operations */
867 xpc_restrict_IPI_ops(); 1001 xpc_restrict_IPI_ops();
@@ -876,6 +1010,34 @@ xpc_do_exit(void)
876} 1010}
877 1011
878 1012
1013/*
1014 * This function is called when the system is being rebooted.
1015 */
1016static int
1017xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1018{
1019 enum xpc_retval reason;
1020
1021
1022 switch (event) {
1023 case SYS_RESTART:
1024 reason = xpcSystemReboot;
1025 break;
1026 case SYS_HALT:
1027 reason = xpcSystemHalt;
1028 break;
1029 case SYS_POWER_OFF:
1030 reason = xpcSystemPoweroff;
1031 break;
1032 default:
1033 reason = xpcSystemGoingDown;
1034 }
1035
1036 xpc_do_exit(reason);
1037 return NOTIFY_DONE;
1038}
1039
1040
879int __init 1041int __init
880xpc_init(void) 1042xpc_init(void)
881{ 1043{
@@ -891,11 +1053,11 @@ xpc_init(void)
891 1053
892 /* 1054 /*
893 * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng 1055 * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
894 * both a partition's reserved page and its XPC variables. Its size was 1056 * various portions of a partition's reserved page. Its size is based
895 * based on the size of a reserved page. So we need to ensure that the 1057 * on the size of the reserved page header and part_nasids mask. So we
896 * XPC variables will fit as well. 1058 * need to ensure that the other items will fit as well.
897 */ 1059 */
898 if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) { 1060 if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
899 dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n"); 1061 dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
900 return -EPERM; 1062 return -EPERM;
901 } 1063 }
@@ -924,6 +1086,12 @@ xpc_init(void)
924 spin_lock_init(&part->act_lock); 1086 spin_lock_init(&part->act_lock);
925 part->act_state = XPC_P_INACTIVE; 1087 part->act_state = XPC_P_INACTIVE;
926 XPC_SET_REASON(part, 0, 0); 1088 XPC_SET_REASON(part, 0, 0);
1089
1090 init_timer(&part->disengage_request_timer);
1091 part->disengage_request_timer.function =
1092 xpc_timeout_partition_disengage_request;
1093 part->disengage_request_timer.data = (unsigned long) part;
1094
927 part->setup_state = XPC_P_UNSET; 1095 part->setup_state = XPC_P_UNSET;
928 init_waitqueue_head(&part->teardown_wq); 1096 init_waitqueue_head(&part->teardown_wq);
929 atomic_set(&part->references, 0); 1097 atomic_set(&part->references, 0);
@@ -980,6 +1148,13 @@ xpc_init(void)
980 } 1148 }
981 1149
982 1150
1151 /* add ourselves to the reboot_notifier_list */
1152 ret = register_reboot_notifier(&xpc_reboot_notifier);
1153 if (ret != 0) {
1154 dev_warn(xpc_part, "can't register reboot notifier\n");
1155 }
1156
1157
983 /* 1158 /*
984 * Set the beating to other partitions into motion. This is 1159 * Set the beating to other partitions into motion. This is
985 * the last requirement for other partitions' discovery to 1160 * the last requirement for other partitions' discovery to
@@ -1001,6 +1176,9 @@ xpc_init(void)
1001 /* indicate to others that our reserved page is uninitialized */ 1176 /* indicate to others that our reserved page is uninitialized */
1002 xpc_rsvd_page->vars_pa = 0; 1177 xpc_rsvd_page->vars_pa = 0;
1003 1178
1179 /* take ourselves off of the reboot_notifier_list */
1180 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1181
1004 del_timer_sync(&xpc_hb_timer); 1182 del_timer_sync(&xpc_hb_timer);
1005 free_irq(SGI_XPC_ACTIVATE, NULL); 1183 free_irq(SGI_XPC_ACTIVATE, NULL);
1006 xpc_restrict_IPI_ops(); 1184 xpc_restrict_IPI_ops();
@@ -1024,7 +1202,7 @@ xpc_init(void)
1024 /* mark this new thread as a non-starter */ 1202 /* mark this new thread as a non-starter */
1025 up(&xpc_discovery_exited); 1203 up(&xpc_discovery_exited);
1026 1204
1027 xpc_do_exit(); 1205 xpc_do_exit(xpcUnloading);
1028 return -EBUSY; 1206 return -EBUSY;
1029 } 1207 }
1030 1208
@@ -1043,7 +1221,7 @@ module_init(xpc_init);
1043void __exit 1221void __exit
1044xpc_exit(void) 1222xpc_exit(void)
1045{ 1223{
1046 xpc_do_exit(); 1224 xpc_do_exit(xpcUnloading);
1047} 1225}
1048module_exit(xpc_exit); 1226module_exit(xpc_exit);
1049 1227
@@ -1060,3 +1238,7 @@ module_param(xpc_hb_check_interval, int, 0);
1060MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between " 1238MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1061 "heartbeat checks."); 1239 "heartbeat checks.");
1062 1240
1241module_param(xpc_disengage_request_timelimit, int, 0);
1242MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
1243 "for disengage request to complete.");
1244
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 578265ea9e67..581e113d2d37 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -44,16 +44,19 @@ static u64 xpc_sh2_IPI_access3;
44 44
45 45
46/* original protection values for each node */ 46/* original protection values for each node */
47u64 xpc_prot_vec[MAX_COMPACT_NODES]; 47u64 xpc_prot_vec[MAX_NUMNODES];
48 48
49 49
50/* this partition's reserved page */ 50/* this partition's reserved page pointers */
51struct xpc_rsvd_page *xpc_rsvd_page; 51struct xpc_rsvd_page *xpc_rsvd_page;
52 52static u64 *xpc_part_nasids;
53/* this partition's XPC variables (within the reserved page) */ 53static u64 *xpc_mach_nasids;
54struct xpc_vars *xpc_vars; 54struct xpc_vars *xpc_vars;
55struct xpc_vars_part *xpc_vars_part; 55struct xpc_vars_part *xpc_vars_part;
56 56
57static int xp_nasid_mask_bytes; /* actual size in bytes of nasid mask */
58static int xp_nasid_mask_words; /* actual size in words of nasid mask */
59
57 60
58/* 61/*
59 * For performance reasons, each entry of xpc_partitions[] is cacheline 62 * For performance reasons, each entry of xpc_partitions[] is cacheline
@@ -65,20 +68,16 @@ struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
65 68
66 69
67/* 70/*
68 * Generic buffer used to store a local copy of the remote partitions 71 * Generic buffer used to store a local copy of portions of a remote
69 * reserved page or XPC variables. 72 * partition's reserved page (either its header and part_nasids mask,
73 * or its vars).
70 * 74 *
71 * xpc_discovery runs only once and is a seperate thread that is 75 * xpc_discovery runs only once and is a seperate thread that is
72 * very likely going to be processing in parallel with receiving 76 * very likely going to be processing in parallel with receiving
73 * interrupts. 77 * interrupts.
74 */ 78 */
75char ____cacheline_aligned 79char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
76 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; 80 XP_NASID_MASK_BYTES];
77
78
79/* systune related variables */
80int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
81int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
82 81
83 82
84/* 83/*
@@ -86,13 +85,16 @@ int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
86 * for that nasid. This function returns 0 on any error. 85 * for that nasid. This function returns 0 on any error.
87 */ 86 */
88static u64 87static u64
89xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size) 88xpc_get_rsvd_page_pa(int nasid)
90{ 89{
91 bte_result_t bte_res; 90 bte_result_t bte_res;
92 s64 status; 91 s64 status;
93 u64 cookie = 0; 92 u64 cookie = 0;
94 u64 rp_pa = nasid; /* seed with nasid */ 93 u64 rp_pa = nasid; /* seed with nasid */
95 u64 len = 0; 94 u64 len = 0;
95 u64 buf = buf;
96 u64 buf_len = 0;
97 void *buf_base = NULL;
96 98
97 99
98 while (1) { 100 while (1) {
@@ -108,13 +110,22 @@ xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
108 break; 110 break;
109 } 111 }
110 112
111 if (len > buf_size) { 113 if (L1_CACHE_ALIGN(len) > buf_len) {
112 dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len); 114 if (buf_base != NULL) {
113 status = SALRET_ERROR; 115 kfree(buf_base);
114 break; 116 }
117 buf_len = L1_CACHE_ALIGN(len);
118 buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
119 GFP_KERNEL, &buf_base);
120 if (buf_base == NULL) {
121 dev_err(xpc_part, "unable to kmalloc "
122 "len=0x%016lx\n", buf_len);
123 status = SALRET_ERROR;
124 break;
125 }
115 } 126 }
116 127
117 bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size, 128 bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_len,
118 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 129 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
119 if (bte_res != BTE_SUCCESS) { 130 if (bte_res != BTE_SUCCESS) {
120 dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res); 131 dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
@@ -123,6 +134,10 @@ xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
123 } 134 }
124 } 135 }
125 136
137 if (buf_base != NULL) {
138 kfree(buf_base);
139 }
140
126 if (status != SALRET_OK) { 141 if (status != SALRET_OK) {
127 rp_pa = 0; 142 rp_pa = 0;
128 } 143 }
@@ -141,15 +156,15 @@ xpc_rsvd_page_init(void)
141{ 156{
142 struct xpc_rsvd_page *rp; 157 struct xpc_rsvd_page *rp;
143 AMO_t *amos_page; 158 AMO_t *amos_page;
144 u64 rp_pa, next_cl, nasid_array = 0; 159 u64 rp_pa, nasid_array = 0;
145 int i, ret; 160 int i, ret;
146 161
147 162
148 /* get the local reserved page's address */ 163 /* get the local reserved page's address */
149 164
150 rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0), 165 preempt_disable();
151 (u64) xpc_remote_copy_buffer, 166 rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
152 XPC_RSVD_PAGE_ALIGNED_SIZE); 167 preempt_enable();
153 if (rp_pa == 0) { 168 if (rp_pa == 0) {
154 dev_err(xpc_part, "SAL failed to locate the reserved page\n"); 169 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
155 return NULL; 170 return NULL;
@@ -164,12 +179,19 @@ xpc_rsvd_page_init(void)
164 179
165 rp->version = XPC_RP_VERSION; 180 rp->version = XPC_RP_VERSION;
166 181
167 /* 182 /* establish the actual sizes of the nasid masks */
168 * Place the XPC variables on the cache line following the 183 if (rp->SAL_version == 1) {
169 * reserved page structure. 184 /* SAL_version 1 didn't set the nasids_size field */
170 */ 185 rp->nasids_size = 128;
171 next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE; 186 }
172 xpc_vars = (struct xpc_vars *) next_cl; 187 xp_nasid_mask_bytes = rp->nasids_size;
188 xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
189
190 /* setup the pointers to the various items in the reserved page */
191 xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
192 xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
193 xpc_vars = XPC_RP_VARS(rp);
194 xpc_vars_part = XPC_RP_VARS_PART(rp);
173 195
174 /* 196 /*
175 * Before clearing xpc_vars, see if a page of AMOs had been previously 197 * Before clearing xpc_vars, see if a page of AMOs had been previously
@@ -221,33 +243,32 @@ xpc_rsvd_page_init(void)
221 amos_page = (AMO_t *) TO_AMO((u64) amos_page); 243 amos_page = (AMO_t *) TO_AMO((u64) amos_page);
222 } 244 }
223 245
246 /* clear xpc_vars */
224 memset(xpc_vars, 0, sizeof(struct xpc_vars)); 247 memset(xpc_vars, 0, sizeof(struct xpc_vars));
225 248
226 /*
227 * Place the XPC per partition specific variables on the cache line
228 * following the XPC variables structure.
229 */
230 next_cl += XPC_VARS_ALIGNED_SIZE;
231 memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
232 XP_MAX_PARTITIONS);
233 xpc_vars_part = (struct xpc_vars_part *) next_cl;
234 xpc_vars->vars_part_pa = __pa(next_cl);
235
236 xpc_vars->version = XPC_V_VERSION; 249 xpc_vars->version = XPC_V_VERSION;
237 xpc_vars->act_nasid = cpuid_to_nasid(0); 250 xpc_vars->act_nasid = cpuid_to_nasid(0);
238 xpc_vars->act_phys_cpuid = cpu_physical_id(0); 251 xpc_vars->act_phys_cpuid = cpu_physical_id(0);
252 xpc_vars->vars_part_pa = __pa(xpc_vars_part);
253 xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
239 xpc_vars->amos_page = amos_page; /* save for next load of XPC */ 254 xpc_vars->amos_page = amos_page; /* save for next load of XPC */
240 255
241 256
242 /* 257 /* clear xpc_vars_part */
243 * Initialize the activation related AMO variables. 258 memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
244 */ 259 XP_MAX_PARTITIONS);
245 xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS); 260
246 for (i = 1; i < XP_NASID_MASK_WORDS; i++) { 261 /* initialize the activate IRQ related AMO variables */
247 xpc_IPI_init(i + XP_MAX_PARTITIONS); 262 for (i = 0; i < xp_nasid_mask_words; i++) {
263 (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
248 } 264 }
249 /* export AMO page's physical address to other partitions */ 265
250 xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); 266 /* initialize the engaged remote partitions related AMO variables */
267 (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
268 (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
269
270 /* timestamp of when reserved page was setup by XPC */
271 rp->stamp = CURRENT_TIME;
251 272
252 /* 273 /*
253 * This signifies to the remote partition that our reserved 274 * This signifies to the remote partition that our reserved
@@ -387,6 +408,11 @@ xpc_check_remote_hb(void)
387 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; 408 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
388 409
389 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 410 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
411
412 if (xpc_exiting) {
413 break;
414 }
415
390 if (partid == sn_partition_id) { 416 if (partid == sn_partition_id) {
391 continue; 417 continue;
392 } 418 }
@@ -401,7 +427,7 @@ xpc_check_remote_hb(void)
401 /* pull the remote_hb cache line */ 427 /* pull the remote_hb cache line */
402 bres = xp_bte_copy(part->remote_vars_pa, 428 bres = xp_bte_copy(part->remote_vars_pa,
403 ia64_tpa((u64) remote_vars), 429 ia64_tpa((u64) remote_vars),
404 XPC_VARS_ALIGNED_SIZE, 430 XPC_RP_VARS_SIZE,
405 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 431 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
406 if (bres != BTE_SUCCESS) { 432 if (bres != BTE_SUCCESS) {
407 XPC_DEACTIVATE_PARTITION(part, 433 XPC_DEACTIVATE_PARTITION(part,
@@ -417,7 +443,7 @@ xpc_check_remote_hb(void)
417 443
418 if (((remote_vars->heartbeat == part->last_heartbeat) && 444 if (((remote_vars->heartbeat == part->last_heartbeat) &&
419 (remote_vars->kdb_status == 0)) || 445 (remote_vars->kdb_status == 0)) ||
420 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { 446 !xpc_hb_allowed(sn_partition_id, remote_vars)) {
421 447
422 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); 448 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
423 continue; 449 continue;
@@ -429,31 +455,31 @@ xpc_check_remote_hb(void)
429 455
430 456
431/* 457/*
432 * Get a copy of the remote partition's rsvd page. 458 * Get a copy of a portion of the remote partition's rsvd page.
433 * 459 *
434 * remote_rp points to a buffer that is cacheline aligned for BTE copies and 460 * remote_rp points to a buffer that is cacheline aligned for BTE copies and
435 * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE. 461 * is large enough to contain a copy of their reserved page header and
462 * part_nasids mask.
436 */ 463 */
437static enum xpc_retval 464static enum xpc_retval
438xpc_get_remote_rp(int nasid, u64 *discovered_nasids, 465xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
439 struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa) 466 struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
440{ 467{
441 int bres, i; 468 int bres, i;
442 469
443 470
444 /* get the reserved page's physical address */ 471 /* get the reserved page's physical address */
445 472
446 *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, 473 *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
447 XPC_RSVD_PAGE_ALIGNED_SIZE); 474 if (*remote_rp_pa == 0) {
448 if (*remote_rsvd_page_pa == 0) {
449 return xpcNoRsvdPageAddr; 475 return xpcNoRsvdPageAddr;
450 } 476 }
451 477
452 478
453 /* pull over the reserved page structure */ 479 /* pull over the reserved page header and part_nasids mask */
454 480
455 bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp), 481 bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
456 XPC_RSVD_PAGE_ALIGNED_SIZE, 482 XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
457 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 483 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
458 if (bres != BTE_SUCCESS) { 484 if (bres != BTE_SUCCESS) {
459 return xpc_map_bte_errors(bres); 485 return xpc_map_bte_errors(bres);
@@ -461,8 +487,11 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
461 487
462 488
463 if (discovered_nasids != NULL) { 489 if (discovered_nasids != NULL) {
464 for (i = 0; i < XP_NASID_MASK_WORDS; i++) { 490 u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
465 discovered_nasids[i] |= remote_rp->part_nasids[i]; 491
492
493 for (i = 0; i < xp_nasid_mask_words; i++) {
494 discovered_nasids[i] |= remote_part_nasids[i];
466 } 495 }
467 } 496 }
468 497
@@ -489,10 +518,10 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
489 518
490 519
491/* 520/*
492 * Get a copy of the remote partition's XPC variables. 521 * Get a copy of the remote partition's XPC variables from the reserved page.
493 * 522 *
494 * remote_vars points to a buffer that is cacheline aligned for BTE copies and 523 * remote_vars points to a buffer that is cacheline aligned for BTE copies and
495 * assumed to be of size XPC_VARS_ALIGNED_SIZE. 524 * assumed to be of size XPC_RP_VARS_SIZE.
496 */ 525 */
497static enum xpc_retval 526static enum xpc_retval
498xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars) 527xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
@@ -508,7 +537,7 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
508 /* pull over the cross partition variables */ 537 /* pull over the cross partition variables */
509 538
510 bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars), 539 bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
511 XPC_VARS_ALIGNED_SIZE, 540 XPC_RP_VARS_SIZE,
512 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 541 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
513 if (bres != BTE_SUCCESS) { 542 if (bres != BTE_SUCCESS) {
514 return xpc_map_bte_errors(bres); 543 return xpc_map_bte_errors(bres);
@@ -524,7 +553,56 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
524 553
525 554
526/* 555/*
527 * Prior code has determine the nasid which generated an IPI. Inspect 556 * Update the remote partition's info.
557 */
558static void
559xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
560 struct timespec *remote_rp_stamp, u64 remote_rp_pa,
561 u64 remote_vars_pa, struct xpc_vars *remote_vars)
562{
563 part->remote_rp_version = remote_rp_version;
564 dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n",
565 part->remote_rp_version);
566
567 part->remote_rp_stamp = *remote_rp_stamp;
568 dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
569 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
570
571 part->remote_rp_pa = remote_rp_pa;
572 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
573
574 part->remote_vars_pa = remote_vars_pa;
575 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
576 part->remote_vars_pa);
577
578 part->last_heartbeat = remote_vars->heartbeat;
579 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
580 part->last_heartbeat);
581
582 part->remote_vars_part_pa = remote_vars->vars_part_pa;
583 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
584 part->remote_vars_part_pa);
585
586 part->remote_act_nasid = remote_vars->act_nasid;
587 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
588 part->remote_act_nasid);
589
590 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
591 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
592 part->remote_act_phys_cpuid);
593
594 part->remote_amos_page_pa = remote_vars->amos_page_pa;
595 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
596 part->remote_amos_page_pa);
597
598 part->remote_vars_version = remote_vars->version;
599 dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
600 part->remote_vars_version);
601}
602
603
604/*
605 * Prior code has determined the nasid which generated an IPI. Inspect
528 * that nasid to determine if its partition needs to be activated or 606 * that nasid to determine if its partition needs to be activated or
529 * deactivated. 607 * deactivated.
530 * 608 *
@@ -542,8 +620,12 @@ xpc_identify_act_IRQ_req(int nasid)
542{ 620{
543 struct xpc_rsvd_page *remote_rp; 621 struct xpc_rsvd_page *remote_rp;
544 struct xpc_vars *remote_vars; 622 struct xpc_vars *remote_vars;
545 u64 remote_rsvd_page_pa; 623 u64 remote_rp_pa;
546 u64 remote_vars_pa; 624 u64 remote_vars_pa;
625 int remote_rp_version;
626 int reactivate = 0;
627 int stamp_diff;
628 struct timespec remote_rp_stamp = { 0, 0 };
547 partid_t partid; 629 partid_t partid;
548 struct xpc_partition *part; 630 struct xpc_partition *part;
549 enum xpc_retval ret; 631 enum xpc_retval ret;
@@ -553,7 +635,7 @@ xpc_identify_act_IRQ_req(int nasid)
553 635
554 remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; 636 remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
555 637
556 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa); 638 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
557 if (ret != xpcSuccess) { 639 if (ret != xpcSuccess) {
558 dev_warn(xpc_part, "unable to get reserved page from nasid %d, " 640 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
559 "which sent interrupt, reason=%d\n", nasid, ret); 641 "which sent interrupt, reason=%d\n", nasid, ret);
@@ -561,6 +643,10 @@ xpc_identify_act_IRQ_req(int nasid)
561 } 643 }
562 644
563 remote_vars_pa = remote_rp->vars_pa; 645 remote_vars_pa = remote_rp->vars_pa;
646 remote_rp_version = remote_rp->version;
647 if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
648 remote_rp_stamp = remote_rp->stamp;
649 }
564 partid = remote_rp->partid; 650 partid = remote_rp->partid;
565 part = &xpc_partitions[partid]; 651 part = &xpc_partitions[partid];
566 652
@@ -586,44 +672,117 @@ xpc_identify_act_IRQ_req(int nasid)
586 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, 672 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
587 remote_vars->heartbeat, remote_vars->heartbeating_to_mask); 673 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
588 674
675 if (xpc_partition_disengaged(part) &&
676 part->act_state == XPC_P_INACTIVE) {
589 677
590 if (part->act_state == XPC_P_INACTIVE) { 678 xpc_update_partition_info(part, remote_rp_version,
679 &remote_rp_stamp, remote_rp_pa,
680 remote_vars_pa, remote_vars);
591 681
592 part->remote_rp_pa = remote_rsvd_page_pa; 682 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
593 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", 683 if (xpc_partition_disengage_requested(1UL << partid)) {
594 part->remote_rp_pa); 684 /*
685 * Other side is waiting on us to disengage,
686 * even though we already have.
687 */
688 return;
689 }
690 } else {
691 /* other side doesn't support disengage requests */
692 xpc_clear_partition_disengage_request(1UL << partid);
693 }
595 694
596 part->remote_vars_pa = remote_vars_pa; 695 xpc_activate_partition(part);
597 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", 696 return;
598 part->remote_vars_pa); 697 }
599 698
600 part->last_heartbeat = remote_vars->heartbeat; 699 DBUG_ON(part->remote_rp_version == 0);
601 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", 700 DBUG_ON(part->remote_vars_version == 0);
602 part->last_heartbeat); 701
702 if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
703 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
704 remote_vars_version));
705
706 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
707 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
708 version));
709 /* see if the other side rebooted */
710 if (part->remote_amos_page_pa ==
711 remote_vars->amos_page_pa &&
712 xpc_hb_allowed(sn_partition_id,
713 remote_vars)) {
714 /* doesn't look that way, so ignore the IPI */
715 return;
716 }
717 }
603 718
604 part->remote_vars_part_pa = remote_vars->vars_part_pa; 719 /*
605 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", 720 * Other side rebooted and previous XPC didn't support the
606 part->remote_vars_part_pa); 721 * disengage request, so we don't need to do anything special.
722 */
607 723
608 part->remote_act_nasid = remote_vars->act_nasid; 724 xpc_update_partition_info(part, remote_rp_version,
609 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", 725 &remote_rp_stamp, remote_rp_pa,
610 part->remote_act_nasid); 726 remote_vars_pa, remote_vars);
727 part->reactivate_nasid = nasid;
728 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
729 return;
730 }
611 731
612 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; 732 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
613 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
614 part->remote_act_phys_cpuid);
615 733
616 part->remote_amos_page_pa = remote_vars->amos_page_pa; 734 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
617 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", 735 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
618 part->remote_amos_page_pa);
619 736
620 xpc_activate_partition(part); 737 /*
738 * Other side rebooted and previous XPC did support the
739 * disengage request, but the new one doesn't.
740 */
741
742 xpc_clear_partition_engaged(1UL << partid);
743 xpc_clear_partition_disengage_request(1UL << partid);
621 744
622 } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa || 745 xpc_update_partition_info(part, remote_rp_version,
623 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { 746 &remote_rp_stamp, remote_rp_pa,
747 remote_vars_pa, remote_vars);
748 reactivate = 1;
749
750 } else {
751 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
624 752
753 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
754 &remote_rp_stamp);
755 if (stamp_diff != 0) {
756 DBUG_ON(stamp_diff >= 0);
757
758 /*
759 * Other side rebooted and the previous XPC did support
760 * the disengage request, as does the new one.
761 */
762
763 DBUG_ON(xpc_partition_engaged(1UL << partid));
764 DBUG_ON(xpc_partition_disengage_requested(1UL <<
765 partid));
766
767 xpc_update_partition_info(part, remote_rp_version,
768 &remote_rp_stamp, remote_rp_pa,
769 remote_vars_pa, remote_vars);
770 reactivate = 1;
771 }
772 }
773
774 if (!xpc_partition_disengaged(part)) {
775 /* still waiting on other side to disengage from us */
776 return;
777 }
778
779 if (reactivate) {
625 part->reactivate_nasid = nasid; 780 part->reactivate_nasid = nasid;
626 XPC_DEACTIVATE_PARTITION(part, xpcReactivating); 781 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
782
783 } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
784 xpc_partition_disengage_requested(1UL << partid)) {
785 XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
627 } 786 }
628} 787}
629 788
@@ -643,14 +802,17 @@ xpc_identify_act_IRQ_sender(void)
643 u64 nasid; /* remote nasid */ 802 u64 nasid; /* remote nasid */
644 int n_IRQs_detected = 0; 803 int n_IRQs_detected = 0;
645 AMO_t *act_amos; 804 AMO_t *act_amos;
646 struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
647 805
648 806
649 act_amos = xpc_vars->act_amos; 807 act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
650 808
651 809
652 /* scan through act AMO variable looking for non-zero entries */ 810 /* scan through act AMO variable looking for non-zero entries */
653 for (word = 0; word < XP_NASID_MASK_WORDS; word++) { 811 for (word = 0; word < xp_nasid_mask_words; word++) {
812
813 if (xpc_exiting) {
814 break;
815 }
654 816
655 nasid_mask = xpc_IPI_receive(&act_amos[word]); 817 nasid_mask = xpc_IPI_receive(&act_amos[word]);
656 if (nasid_mask == 0) { 818 if (nasid_mask == 0) {
@@ -668,7 +830,7 @@ xpc_identify_act_IRQ_sender(void)
668 * remote nasid in our reserved pages machine mask. 830 * remote nasid in our reserved pages machine mask.
669 * This is used in the event of module reload. 831 * This is used in the event of module reload.
670 */ 832 */
671 rp->mach_nasids[word] |= nasid_mask; 833 xpc_mach_nasids[word] |= nasid_mask;
672 834
673 835
674 /* locate the nasid(s) which sent interrupts */ 836 /* locate the nasid(s) which sent interrupts */
@@ -688,6 +850,55 @@ xpc_identify_act_IRQ_sender(void)
688 850
689 851
690/* 852/*
853 * See if the other side has responded to a partition disengage request
854 * from us.
855 */
856int
857xpc_partition_disengaged(struct xpc_partition *part)
858{
859 partid_t partid = XPC_PARTID(part);
860 int disengaged;
861
862
863 disengaged = (xpc_partition_engaged(1UL << partid) == 0);
864 if (part->disengage_request_timeout) {
865 if (!disengaged) {
866 if (jiffies < part->disengage_request_timeout) {
867 /* timelimit hasn't been reached yet */
868 return 0;
869 }
870
871 /*
872 * Other side hasn't responded to our disengage
873 * request in a timely fashion, so assume it's dead.
874 */
875
876 xpc_clear_partition_engaged(1UL << partid);
877 disengaged = 1;
878 }
879 part->disengage_request_timeout = 0;
880
881 /* cancel the timer function, provided it's not us */
882 if (!in_interrupt()) {
883 del_singleshot_timer_sync(&part->
884 disengage_request_timer);
885 }
886
887 DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
888 part->act_state != XPC_P_INACTIVE);
889 if (part->act_state != XPC_P_INACTIVE) {
890 xpc_wakeup_channel_mgr(part);
891 }
892
893 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
894 xpc_cancel_partition_disengage_request(part);
895 }
896 }
897 return disengaged;
898}
899
900
901/*
691 * Mark specified partition as active. 902 * Mark specified partition as active.
692 */ 903 */
693enum xpc_retval 904enum xpc_retval
@@ -721,7 +932,6 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
721 enum xpc_retval reason) 932 enum xpc_retval reason)
722{ 933{
723 unsigned long irq_flags; 934 unsigned long irq_flags;
724 partid_t partid = XPC_PARTID(part);
725 935
726 936
727 spin_lock_irqsave(&part->act_lock, irq_flags); 937 spin_lock_irqsave(&part->act_lock, irq_flags);
@@ -749,17 +959,27 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
749 959
750 spin_unlock_irqrestore(&part->act_lock, irq_flags); 960 spin_unlock_irqrestore(&part->act_lock, irq_flags);
751 961
752 XPC_DISALLOW_HB(partid, xpc_vars); 962 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
963 xpc_request_partition_disengage(part);
964 xpc_IPI_send_disengage(part);
753 965
754 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, 966 /* set a timelimit on the disengage request */
755 reason); 967 part->disengage_request_timeout = jiffies +
968 (xpc_disengage_request_timelimit * HZ);
969 part->disengage_request_timer.expires =
970 part->disengage_request_timeout;
971 add_timer(&part->disengage_request_timer);
972 }
973
974 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
975 XPC_PARTID(part), reason);
756 976
757 xpc_partition_down(part, reason); 977 xpc_partition_going_down(part, reason);
758} 978}
759 979
760 980
761/* 981/*
762 * Mark specified partition as active. 982 * Mark specified partition as inactive.
763 */ 983 */
764void 984void
765xpc_mark_partition_inactive(struct xpc_partition *part) 985xpc_mark_partition_inactive(struct xpc_partition *part)
@@ -792,9 +1012,10 @@ xpc_discovery(void)
792 void *remote_rp_base; 1012 void *remote_rp_base;
793 struct xpc_rsvd_page *remote_rp; 1013 struct xpc_rsvd_page *remote_rp;
794 struct xpc_vars *remote_vars; 1014 struct xpc_vars *remote_vars;
795 u64 remote_rsvd_page_pa; 1015 u64 remote_rp_pa;
796 u64 remote_vars_pa; 1016 u64 remote_vars_pa;
797 int region; 1017 int region;
1018 int region_size;
798 int max_regions; 1019 int max_regions;
799 int nasid; 1020 int nasid;
800 struct xpc_rsvd_page *rp; 1021 struct xpc_rsvd_page *rp;
@@ -804,7 +1025,8 @@ xpc_discovery(void)
804 enum xpc_retval ret; 1025 enum xpc_retval ret;
805 1026
806 1027
807 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE, 1028 remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
1029 xp_nasid_mask_bytes,
808 GFP_KERNEL, &remote_rp_base); 1030 GFP_KERNEL, &remote_rp_base);
809 if (remote_rp == NULL) { 1031 if (remote_rp == NULL) {
810 return; 1032 return;
@@ -812,13 +1034,13 @@ xpc_discovery(void)
812 remote_vars = (struct xpc_vars *) remote_rp; 1034 remote_vars = (struct xpc_vars *) remote_rp;
813 1035
814 1036
815 discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS, 1037 discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words,
816 GFP_KERNEL); 1038 GFP_KERNEL);
817 if (discovered_nasids == NULL) { 1039 if (discovered_nasids == NULL) {
818 kfree(remote_rp_base); 1040 kfree(remote_rp_base);
819 return; 1041 return;
820 } 1042 }
821 memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS); 1043 memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
822 1044
823 rp = (struct xpc_rsvd_page *) xpc_rsvd_page; 1045 rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
824 1046
@@ -827,11 +1049,19 @@ xpc_discovery(void)
827 * nodes that can comprise an access protection grouping. The access 1049 * nodes that can comprise an access protection grouping. The access
828 * protection is in regards to memory, IOI and IPI. 1050 * protection is in regards to memory, IOI and IPI.
829 */ 1051 */
830//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or 1052 max_regions = 64;
831//>>> include/asm-ia64/sn/addrs.h 1053 region_size = sn_region_size;
832#define SH1_MAX_REGIONS 64 1054
833#define SH2_MAX_REGIONS 256 1055 switch (region_size) {
834 max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS; 1056 case 128:
1057 max_regions *= 2;
1058 case 64:
1059 max_regions *= 2;
1060 case 32:
1061 max_regions *= 2;
1062 region_size = 16;
1063 DBUG_ON(!is_shub2());
1064 }
835 1065
836 for (region = 0; region < max_regions; region++) { 1066 for (region = 0; region < max_regions; region++) {
837 1067
@@ -841,8 +1071,8 @@ xpc_discovery(void)
841 1071
842 dev_dbg(xpc_part, "searching region %d\n", region); 1072 dev_dbg(xpc_part, "searching region %d\n", region);
843 1073
844 for (nasid = (region * sn_region_size * 2); 1074 for (nasid = (region * region_size * 2);
845 nasid < ((region + 1) * sn_region_size * 2); 1075 nasid < ((region + 1) * region_size * 2);
846 nasid += 2) { 1076 nasid += 2) {
847 1077
848 if ((volatile int) xpc_exiting) { 1078 if ((volatile int) xpc_exiting) {
@@ -852,14 +1082,14 @@ xpc_discovery(void)
852 dev_dbg(xpc_part, "checking nasid %d\n", nasid); 1082 dev_dbg(xpc_part, "checking nasid %d\n", nasid);
853 1083
854 1084
855 if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) { 1085 if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
856 dev_dbg(xpc_part, "PROM indicates Nasid %d is " 1086 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
857 "part of the local partition; skipping " 1087 "part of the local partition; skipping "
858 "region\n", nasid); 1088 "region\n", nasid);
859 break; 1089 break;
860 } 1090 }
861 1091
862 if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) { 1092 if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
863 dev_dbg(xpc_part, "PROM indicates Nasid %d was " 1093 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
864 "not on Numa-Link network at reset\n", 1094 "not on Numa-Link network at reset\n",
865 nasid); 1095 nasid);
@@ -877,7 +1107,7 @@ xpc_discovery(void)
877 /* pull over the reserved page structure */ 1107 /* pull over the reserved page structure */
878 1108
879 ret = xpc_get_remote_rp(nasid, discovered_nasids, 1109 ret = xpc_get_remote_rp(nasid, discovered_nasids,
880 remote_rp, &remote_rsvd_page_pa); 1110 remote_rp, &remote_rp_pa);
881 if (ret != xpcSuccess) { 1111 if (ret != xpcSuccess) {
882 dev_dbg(xpc_part, "unable to get reserved page " 1112 dev_dbg(xpc_part, "unable to get reserved page "
883 "from nasid %d, reason=%d\n", nasid, 1113 "from nasid %d, reason=%d\n", nasid,
@@ -948,6 +1178,13 @@ xpc_discovery(void)
948 remote_vars->act_nasid, 1178 remote_vars->act_nasid,
949 remote_vars->act_phys_cpuid); 1179 remote_vars->act_phys_cpuid);
950 1180
1181 if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1182 version)) {
1183 part->remote_amos_page_pa =
1184 remote_vars->amos_page_pa;
1185 xpc_mark_partition_disengaged(part);
1186 xpc_cancel_partition_disengage_request(part);
1187 }
951 xpc_IPI_send_activate(remote_vars); 1188 xpc_IPI_send_activate(remote_vars);
952 } 1189 }
953 } 1190 }
@@ -974,12 +1211,12 @@ xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
974 return xpcPartitionDown; 1211 return xpcPartitionDown;
975 } 1212 }
976 1213
977 part_nasid_pa = part->remote_rp_pa + 1214 memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
978 (u64) &((struct xpc_rsvd_page *) 0)->part_nasids; 1215
1216 part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
979 1217
980 bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask), 1218 bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
981 L1_CACHE_ALIGN(XP_NASID_MASK_BYTES), 1219 xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
982 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
983 1220
984 return xpc_map_bte_errors(bte_res); 1221 return xpc_map_bte_errors(bte_res);
985} 1222}