aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/hp/sim/simserial.c10
-rw-r--r--arch/ia64/kernel/fsys.S1
-rw-r--r--arch/ia64/kernel/jprobes.S27
-rw-r--r--arch/ia64/kernel/kprobes.c57
-rw-r--r--arch/ia64/kernel/mca_asm.S2
-rw-r--r--arch/ia64/kernel/salinfo.c170
-rw-r--r--arch/ia64/kernel/traps.c26
-rw-r--r--arch/ia64/mm/tlb.c2
-rw-r--r--arch/ia64/sn/include/xtalk/hubdev.h16
-rw-r--r--arch/ia64/sn/kernel/bte_error.c58
-rw-r--r--arch/ia64/sn/kernel/huberror.c9
-rw-r--r--arch/ia64/sn/kernel/io_init.c94
-rw-r--r--arch/ia64/sn/kernel/xpc.h1273
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c24
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c189
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c10
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_dma.c34
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_provider.c20
18 files changed, 523 insertions, 1499 deletions
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
index a346e1833bf2..27f23fa5ca15 100644
--- a/arch/ia64/hp/sim/simserial.c
+++ b/arch/ia64/hp/sim/simserial.c
@@ -167,15 +167,9 @@ static void receive_chars(struct tty_struct *tty, struct pt_regs *regs)
167 } 167 }
168 } 168 }
169 seen_esc = 0; 169 seen_esc = 0;
170 if (tty->flip.count >= TTY_FLIPBUF_SIZE) break;
171 170
172 *tty->flip.char_buf_ptr = ch; 171 if (tty_insert_flip_char(tty, ch, TTY_NORMAL) == 0)
173 172 break;
174 *tty->flip.flag_buf_ptr = 0;
175
176 tty->flip.flag_buf_ptr++;
177 tty->flip.char_buf_ptr++;
178 tty->flip.count++;
179 } 173 }
180 tty_flip_buffer_push(tty); 174 tty_flip_buffer_push(tty);
181} 175}
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 2ddbac6f4999..ce423910ca97 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -903,5 +903,6 @@ fsyscall_table:
903 data8 0 903 data8 0
904 data8 0 904 data8 0
905 data8 0 905 data8 0
906 data8 0 // 1280
906 907
907 .org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls 908 .org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S
index 2323377e3695..5cd6226f44f2 100644
--- a/arch/ia64/kernel/jprobes.S
+++ b/arch/ia64/kernel/jprobes.S
@@ -60,3 +60,30 @@ END(jprobe_break)
60GLOBAL_ENTRY(jprobe_inst_return) 60GLOBAL_ENTRY(jprobe_inst_return)
61 br.call.sptk.many b0=jprobe_break 61 br.call.sptk.many b0=jprobe_break
62END(jprobe_inst_return) 62END(jprobe_inst_return)
63
64GLOBAL_ENTRY(invalidate_stacked_regs)
65 movl r16=invalidate_restore_cfm
66 ;;
67 mov b6=r16
68 ;;
69 br.ret.sptk.many b6
70 ;;
71invalidate_restore_cfm:
72 mov r16=ar.rsc
73 ;;
74 mov ar.rsc=r0
75 ;;
76 loadrs
77 ;;
78 mov ar.rsc=r16
79 ;;
80 br.cond.sptk.many rp
81END(invalidate_stacked_regs)
82
83GLOBAL_ENTRY(flush_register_stack)
84 // flush dirty regs to backing store (must be first in insn group)
85 flushrs
86 ;;
87 br.ret.sptk.many rp
88END(flush_register_stack)
89
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 346fedf9ea47..50ae8c7d453d 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -766,11 +766,56 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
766 return ret; 766 return ret;
767} 767}
768 768
769struct param_bsp_cfm {
770 unsigned long ip;
771 unsigned long *bsp;
772 unsigned long cfm;
773};
774
775static void ia64_get_bsp_cfm(struct unw_frame_info *info, void *arg)
776{
777 unsigned long ip;
778 struct param_bsp_cfm *lp = arg;
779
780 do {
781 unw_get_ip(info, &ip);
782 if (ip == 0)
783 break;
784 if (ip == lp->ip) {
785 unw_get_bsp(info, (unsigned long*)&lp->bsp);
786 unw_get_cfm(info, (unsigned long*)&lp->cfm);
787 return;
788 }
789 } while (unw_unwind(info) >= 0);
790 lp->bsp = 0;
791 lp->cfm = 0;
792 return;
793}
794
769int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) 795int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
770{ 796{
771 struct jprobe *jp = container_of(p, struct jprobe, kp); 797 struct jprobe *jp = container_of(p, struct jprobe, kp);
772 unsigned long addr = ((struct fnptr *)(jp->entry))->ip; 798 unsigned long addr = ((struct fnptr *)(jp->entry))->ip;
773 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 799 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
800 struct param_bsp_cfm pa;
801 int bytes;
802
803 /*
804 * Callee owns the argument space and could overwrite it, eg
805 * tail call optimization. So to be absolutely safe
806 * we save the argument space before transfering the control
807 * to instrumented jprobe function which runs in
808 * the process context
809 */
810 pa.ip = regs->cr_iip;
811 unw_init_running(ia64_get_bsp_cfm, &pa);
812 bytes = (char *)ia64_rse_skip_regs(pa.bsp, pa.cfm & 0x3f)
813 - (char *)pa.bsp;
814 memcpy( kcb->jprobes_saved_stacked_regs,
815 pa.bsp,
816 bytes );
817 kcb->bsp = pa.bsp;
818 kcb->cfm = pa.cfm;
774 819
775 /* save architectural state */ 820 /* save architectural state */
776 kcb->jprobe_saved_regs = *regs; 821 kcb->jprobe_saved_regs = *regs;
@@ -792,8 +837,20 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
792int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 837int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
793{ 838{
794 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 839 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
840 int bytes;
795 841
842 /* restoring architectural state */
796 *regs = kcb->jprobe_saved_regs; 843 *regs = kcb->jprobe_saved_regs;
844
845 /* restoring the original argument space */
846 flush_register_stack();
847 bytes = (char *)ia64_rse_skip_regs(kcb->bsp, kcb->cfm & 0x3f)
848 - (char *)kcb->bsp;
849 memcpy( kcb->bsp,
850 kcb->jprobes_saved_stacked_regs,
851 bytes );
852 invalidate_stacked_regs();
853
797 preempt_enable_no_resched(); 854 preempt_enable_no_resched();
798 return 1; 855 return 1;
799} 856}
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index db32fc1d3935..403a80a58c13 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -847,7 +847,7 @@ ia64_state_restore:
847 ;; 847 ;;
848 mov cr.iim=temp3 848 mov cr.iim=temp3
849 mov cr.iha=temp4 849 mov cr.iha=temp4
850 dep r22=0,r22,62,2 // pal_min_state, physical, uncached 850 dep r22=0,r22,62,1 // pal_min_state, physical, uncached
851 mov IA64_KR(CURRENT)=r21 851 mov IA64_KR(CURRENT)=r21
852 ld8 r8=[temp1] // os_status 852 ld8 r8=[temp1] // os_status
853 ld8 r10=[temp2] // context 853 ld8 r10=[temp2] // context
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index a87a162a3086..9d5a823479a3 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Creates entries in /proc/sal for various system features. 4 * Creates entries in /proc/sal for various system features.
5 * 5 *
6 * Copyright (c) 2003 Silicon Graphics, Inc. All rights reserved. 6 * Copyright (c) 2003, 2006 Silicon Graphics, Inc. All rights reserved.
7 * Copyright (c) 2003 Hewlett-Packard Co 7 * Copyright (c) 2003 Hewlett-Packard Co
8 * Bjorn Helgaas <bjorn.helgaas@hp.com> 8 * Bjorn Helgaas <bjorn.helgaas@hp.com>
9 * 9 *
@@ -27,9 +27,17 @@
27 * mca.c may not pass a buffer, a NULL buffer just indicates that a new 27 * mca.c may not pass a buffer, a NULL buffer just indicates that a new
28 * record is available in SAL. 28 * record is available in SAL.
29 * Replace some NR_CPUS by cpus_online, for hotplug cpu. 29 * Replace some NR_CPUS by cpus_online, for hotplug cpu.
30 *
31 * Jan 5 2006 kaos@sgi.com
32 * Handle hotplug cpus coming online.
33 * Handle hotplug cpus going offline while they still have outstanding records.
34 * Use the cpu_* macros consistently.
35 * Replace the counting semaphore with a mutex and a test if the cpumask is non-empty.
36 * Modify the locking to make the test for "work to do" an atomic operation.
30 */ 37 */
31 38
32#include <linux/capability.h> 39#include <linux/capability.h>
40#include <linux/cpu.h>
33#include <linux/types.h> 41#include <linux/types.h>
34#include <linux/proc_fs.h> 42#include <linux/proc_fs.h>
35#include <linux/module.h> 43#include <linux/module.h>
@@ -132,8 +140,8 @@ enum salinfo_state {
132}; 140};
133 141
134struct salinfo_data { 142struct salinfo_data {
135 volatile cpumask_t cpu_event; /* which cpus have outstanding events */ 143 cpumask_t cpu_event; /* which cpus have outstanding events */
136 struct semaphore sem; /* count of cpus with outstanding events (bits set in cpu_event) */ 144 struct semaphore mutex;
137 u8 *log_buffer; 145 u8 *log_buffer;
138 u64 log_size; 146 u64 log_size;
139 u8 *oemdata; /* decoded oem data */ 147 u8 *oemdata; /* decoded oem data */
@@ -174,6 +182,21 @@ struct salinfo_platform_oemdata_parms {
174 int ret; 182 int ret;
175}; 183};
176 184
185/* Kick the mutex that tells user space that there is work to do. Instead of
186 * trying to track the state of the mutex across multiple cpus, in user
187 * context, interrupt context, non-maskable interrupt context and hotplug cpu,
188 * it is far easier just to grab the mutex if it is free then release it.
189 *
190 * This routine must be called with data_saved_lock held, to make the down/up
191 * operation atomic.
192 */
193static void
194salinfo_work_to_do(struct salinfo_data *data)
195{
196 down_trylock(&data->mutex);
197 up(&data->mutex);
198}
199
177static void 200static void
178salinfo_platform_oemdata_cpu(void *context) 201salinfo_platform_oemdata_cpu(void *context)
179{ 202{
@@ -212,9 +235,9 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
212 235
213 BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); 236 BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
214 237
238 if (irqsafe)
239 spin_lock_irqsave(&data_saved_lock, flags);
215 if (buffer) { 240 if (buffer) {
216 if (irqsafe)
217 spin_lock_irqsave(&data_saved_lock, flags);
218 for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { 241 for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
219 if (!data_saved->buffer) 242 if (!data_saved->buffer)
220 break; 243 break;
@@ -232,13 +255,11 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
232 data_saved->size = size; 255 data_saved->size = size;
233 data_saved->buffer = buffer; 256 data_saved->buffer = buffer;
234 } 257 }
235 if (irqsafe)
236 spin_unlock_irqrestore(&data_saved_lock, flags);
237 } 258 }
238 259 cpu_set(smp_processor_id(), data->cpu_event);
239 if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) { 260 if (irqsafe) {
240 if (irqsafe) 261 salinfo_work_to_do(data);
241 up(&data->sem); 262 spin_unlock_irqrestore(&data_saved_lock, flags);
242 } 263 }
243} 264}
244 265
@@ -249,20 +270,17 @@ static struct timer_list salinfo_timer;
249static void 270static void
250salinfo_timeout_check(struct salinfo_data *data) 271salinfo_timeout_check(struct salinfo_data *data)
251{ 272{
252 int i; 273 unsigned long flags;
253 if (!data->open) 274 if (!data->open)
254 return; 275 return;
255 for_each_online_cpu(i) { 276 if (!cpus_empty(data->cpu_event)) {
256 if (test_bit(i, &data->cpu_event)) { 277 spin_lock_irqsave(&data_saved_lock, flags);
257 /* double up() is not a problem, user space will see no 278 salinfo_work_to_do(data);
258 * records for the additional "events". 279 spin_unlock_irqrestore(&data_saved_lock, flags);
259 */
260 up(&data->sem);
261 }
262 } 280 }
263} 281}
264 282
265static void 283static void
266salinfo_timeout (unsigned long arg) 284salinfo_timeout (unsigned long arg)
267{ 285{
268 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); 286 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
@@ -290,16 +308,20 @@ salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t
290 int i, n, cpu = -1; 308 int i, n, cpu = -1;
291 309
292retry: 310retry:
293 if (down_trylock(&data->sem)) { 311 if (cpus_empty(data->cpu_event) && down_trylock(&data->mutex)) {
294 if (file->f_flags & O_NONBLOCK) 312 if (file->f_flags & O_NONBLOCK)
295 return -EAGAIN; 313 return -EAGAIN;
296 if (down_interruptible(&data->sem)) 314 if (down_interruptible(&data->mutex))
297 return -EINTR; 315 return -EINTR;
298 } 316 }
299 317
300 n = data->cpu_check; 318 n = data->cpu_check;
301 for (i = 0; i < NR_CPUS; i++) { 319 for (i = 0; i < NR_CPUS; i++) {
302 if (test_bit(n, &data->cpu_event) && cpu_online(n)) { 320 if (cpu_isset(n, data->cpu_event)) {
321 if (!cpu_online(n)) {
322 cpu_clear(n, data->cpu_event);
323 continue;
324 }
303 cpu = n; 325 cpu = n;
304 break; 326 break;
305 } 327 }
@@ -310,9 +332,6 @@ retry:
310 if (cpu == -1) 332 if (cpu == -1)
311 goto retry; 333 goto retry;
312 334
313 /* events are sticky until the user says "clear" */
314 up(&data->sem);
315
316 /* for next read, start checking at next CPU */ 335 /* for next read, start checking at next CPU */
317 data->cpu_check = cpu; 336 data->cpu_check = cpu;
318 if (++data->cpu_check == NR_CPUS) 337 if (++data->cpu_check == NR_CPUS)
@@ -381,10 +400,8 @@ salinfo_log_release(struct inode *inode, struct file *file)
381static void 400static void
382call_on_cpu(int cpu, void (*fn)(void *), void *arg) 401call_on_cpu(int cpu, void (*fn)(void *), void *arg)
383{ 402{
384 cpumask_t save_cpus_allowed, new_cpus_allowed; 403 cpumask_t save_cpus_allowed = current->cpus_allowed;
385 memcpy(&save_cpus_allowed, &current->cpus_allowed, sizeof(save_cpus_allowed)); 404 cpumask_t new_cpus_allowed = cpumask_of_cpu(cpu);
386 memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
387 set_bit(cpu, &new_cpus_allowed);
388 set_cpus_allowed(current, new_cpus_allowed); 405 set_cpus_allowed(current, new_cpus_allowed);
389 (*fn)(arg); 406 (*fn)(arg);
390 set_cpus_allowed(current, save_cpus_allowed); 407 set_cpus_allowed(current, save_cpus_allowed);
@@ -433,10 +450,10 @@ retry:
433 if (!data->saved_num) 450 if (!data->saved_num)
434 call_on_cpu(cpu, salinfo_log_read_cpu, data); 451 call_on_cpu(cpu, salinfo_log_read_cpu, data);
435 if (!data->log_size) { 452 if (!data->log_size) {
436 data->state = STATE_NO_DATA; 453 data->state = STATE_NO_DATA;
437 clear_bit(cpu, &data->cpu_event); 454 cpu_clear(cpu, data->cpu_event);
438 } else { 455 } else {
439 data->state = STATE_LOG_RECORD; 456 data->state = STATE_LOG_RECORD;
440 } 457 }
441} 458}
442 459
@@ -473,27 +490,31 @@ static int
473salinfo_log_clear(struct salinfo_data *data, int cpu) 490salinfo_log_clear(struct salinfo_data *data, int cpu)
474{ 491{
475 sal_log_record_header_t *rh; 492 sal_log_record_header_t *rh;
493 unsigned long flags;
494 spin_lock_irqsave(&data_saved_lock, flags);
476 data->state = STATE_NO_DATA; 495 data->state = STATE_NO_DATA;
477 if (!test_bit(cpu, &data->cpu_event)) 496 if (!cpu_isset(cpu, data->cpu_event)) {
497 spin_unlock_irqrestore(&data_saved_lock, flags);
478 return 0; 498 return 0;
479 down(&data->sem); 499 }
480 clear_bit(cpu, &data->cpu_event); 500 cpu_clear(cpu, data->cpu_event);
481 if (data->saved_num) { 501 if (data->saved_num) {
482 unsigned long flags; 502 shift1_data_saved(data, data->saved_num - 1);
483 spin_lock_irqsave(&data_saved_lock, flags);
484 shift1_data_saved(data, data->saved_num - 1 );
485 data->saved_num = 0; 503 data->saved_num = 0;
486 spin_unlock_irqrestore(&data_saved_lock, flags);
487 } 504 }
505 spin_unlock_irqrestore(&data_saved_lock, flags);
488 rh = (sal_log_record_header_t *)(data->log_buffer); 506 rh = (sal_log_record_header_t *)(data->log_buffer);
489 /* Corrected errors have already been cleared from SAL */ 507 /* Corrected errors have already been cleared from SAL */
490 if (rh->severity != sal_log_severity_corrected) 508 if (rh->severity != sal_log_severity_corrected)
491 call_on_cpu(cpu, salinfo_log_clear_cpu, data); 509 call_on_cpu(cpu, salinfo_log_clear_cpu, data);
492 /* clearing a record may make a new record visible */ 510 /* clearing a record may make a new record visible */
493 salinfo_log_new_read(cpu, data); 511 salinfo_log_new_read(cpu, data);
494 if (data->state == STATE_LOG_RECORD && 512 if (data->state == STATE_LOG_RECORD) {
495 !test_and_set_bit(cpu, &data->cpu_event)) 513 spin_lock_irqsave(&data_saved_lock, flags);
496 up(&data->sem); 514 cpu_set(cpu, data->cpu_event);
515 salinfo_work_to_do(data);
516 spin_unlock_irqrestore(&data_saved_lock, flags);
517 }
497 return 0; 518 return 0;
498} 519}
499 520
@@ -550,6 +571,53 @@ static struct file_operations salinfo_data_fops = {
550 .write = salinfo_log_write, 571 .write = salinfo_log_write,
551}; 572};
552 573
574#ifdef CONFIG_HOTPLUG_CPU
575static int __devinit
576salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
577{
578 unsigned int i, cpu = (unsigned long)hcpu;
579 unsigned long flags;
580 struct salinfo_data *data;
581 switch (action) {
582 case CPU_ONLINE:
583 spin_lock_irqsave(&data_saved_lock, flags);
584 for (i = 0, data = salinfo_data;
585 i < ARRAY_SIZE(salinfo_data);
586 ++i, ++data) {
587 cpu_set(cpu, data->cpu_event);
588 salinfo_work_to_do(data);
589 }
590 spin_unlock_irqrestore(&data_saved_lock, flags);
591 break;
592 case CPU_DEAD:
593 spin_lock_irqsave(&data_saved_lock, flags);
594 for (i = 0, data = salinfo_data;
595 i < ARRAY_SIZE(salinfo_data);
596 ++i, ++data) {
597 struct salinfo_data_saved *data_saved;
598 int j;
599 for (j = ARRAY_SIZE(data->data_saved) - 1, data_saved = data->data_saved + j;
600 j >= 0;
601 --j, --data_saved) {
602 if (data_saved->buffer && data_saved->cpu == cpu) {
603 shift1_data_saved(data, j);
604 }
605 }
606 cpu_clear(cpu, data->cpu_event);
607 }
608 spin_unlock_irqrestore(&data_saved_lock, flags);
609 break;
610 }
611 return NOTIFY_OK;
612}
613
614static struct notifier_block salinfo_cpu_notifier =
615{
616 .notifier_call = salinfo_cpu_callback,
617 .priority = 0,
618};
619#endif /* CONFIG_HOTPLUG_CPU */
620
553static int __init 621static int __init
554salinfo_init(void) 622salinfo_init(void)
555{ 623{
@@ -557,7 +625,7 @@ salinfo_init(void)
557 struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */ 625 struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
558 struct proc_dir_entry *dir, *entry; 626 struct proc_dir_entry *dir, *entry;
559 struct salinfo_data *data; 627 struct salinfo_data *data;
560 int i, j, online; 628 int i, j;
561 629
562 salinfo_dir = proc_mkdir("sal", NULL); 630 salinfo_dir = proc_mkdir("sal", NULL);
563 if (!salinfo_dir) 631 if (!salinfo_dir)
@@ -572,7 +640,7 @@ salinfo_init(void)
572 for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { 640 for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
573 data = salinfo_data + i; 641 data = salinfo_data + i;
574 data->type = i; 642 data->type = i;
575 sema_init(&data->sem, 0); 643 init_MUTEX(&data->mutex);
576 dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); 644 dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
577 if (!dir) 645 if (!dir)
578 continue; 646 continue;
@@ -592,12 +660,8 @@ salinfo_init(void)
592 *sdir++ = entry; 660 *sdir++ = entry;
593 661
594 /* we missed any events before now */ 662 /* we missed any events before now */
595 online = 0; 663 for_each_online_cpu(j)
596 for_each_online_cpu(j) { 664 cpu_set(j, data->cpu_event);
597 set_bit(j, &data->cpu_event);
598 ++online;
599 }
600 sema_init(&data->sem, online);
601 665
602 *sdir++ = dir; 666 *sdir++ = dir;
603 } 667 }
@@ -609,6 +673,10 @@ salinfo_init(void)
609 salinfo_timer.function = &salinfo_timeout; 673 salinfo_timer.function = &salinfo_timeout;
610 add_timer(&salinfo_timer); 674 add_timer(&salinfo_timer);
611 675
676#ifdef CONFIG_HOTPLUG_CPU
677 register_cpu_notifier(&salinfo_cpu_notifier);
678#endif
679
612 return 0; 680 return 0;
613} 681}
614 682
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
index d3e0ecb56d62..55391901b013 100644
--- a/arch/ia64/kernel/traps.c
+++ b/arch/ia64/kernel/traps.c
@@ -530,12 +530,15 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
530 if (fsys_mode(current, &regs)) { 530 if (fsys_mode(current, &regs)) {
531 extern char __kernel_syscall_via_break[]; 531 extern char __kernel_syscall_via_break[];
532 /* 532 /*
533 * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap 533 * Got a trap in fsys-mode: Taken Branch Trap
534 * need special handling; Debug trap is not supposed to happen. 534 * and Single Step trap need special handling;
535 * Debug trap is ignored (we disable it here
536 * and re-enable it in the lower-privilege trap).
535 */ 537 */
536 if (unlikely(vector == 29)) { 538 if (unlikely(vector == 29)) {
537 die("Got debug trap in fsys-mode---not supposed to happen!", 539 set_thread_flag(TIF_DB_DISABLED);
538 &regs, 0); 540 ia64_psr(&regs)->db = 0;
541 ia64_psr(&regs)->lp = 1;
539 return; 542 return;
540 } 543 }
541 /* re-do the system call via break 0x100000: */ 544 /* re-do the system call via break 0x100000: */
@@ -589,10 +592,19 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
589 case 34: 592 case 34:
590 if (isr & 0x2) { 593 if (isr & 0x2) {
591 /* Lower-Privilege Transfer Trap */ 594 /* Lower-Privilege Transfer Trap */
595
596 /* If we disabled debug traps during an fsyscall,
597 * re-enable them here.
598 */
599 if (test_thread_flag(TIF_DB_DISABLED)) {
600 clear_thread_flag(TIF_DB_DISABLED);
601 ia64_psr(&regs)->db = 1;
602 }
603
592 /* 604 /*
593 * Just clear PSR.lp and then return immediately: all the 605 * Just clear PSR.lp and then return immediately:
594 * interesting work (e.g., signal delivery is done in the kernel 606 * all the interesting work (e.g., signal delivery)
595 * exit path). 607 * is done in the kernel exit path.
596 */ 608 */
597 ia64_psr(&regs)->lp = 0; 609 ia64_psr(&regs)->lp = 0;
598 return; 610 return;
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 41105d454423..6a4eec9113e8 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -90,7 +90,7 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
90{ 90{
91 static DEFINE_SPINLOCK(ptcg_lock); 91 static DEFINE_SPINLOCK(ptcg_lock);
92 92
93 if (mm != current->active_mm) { 93 if (mm != current->active_mm || !current->mm) {
94 flush_tlb_all(); 94 flush_tlb_all();
95 return; 95 return;
96 } 96 }
diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h
index 71c2b271b4c6..4d417c301201 100644
--- a/arch/ia64/sn/include/xtalk/hubdev.h
+++ b/arch/ia64/sn/include/xtalk/hubdev.h
@@ -26,11 +26,14 @@
26#define IIO_NUM_ITTES 7 26#define IIO_NUM_ITTES 7
27#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1) 27#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1)
28 28
29struct sn_flush_device_list { 29/* This struct is shared between the PROM and the kernel.
30 * Changes to this struct will require corresponding changes to the kernel.
31 */
32struct sn_flush_device_common {
30 int sfdl_bus; 33 int sfdl_bus;
31 int sfdl_slot; 34 int sfdl_slot;
32 int sfdl_pin; 35 int sfdl_pin;
33 struct bar_list { 36 struct common_bar_list {
34 unsigned long start; 37 unsigned long start;
35 unsigned long end; 38 unsigned long end;
36 } sfdl_bar_list[6]; 39 } sfdl_bar_list[6];
@@ -40,14 +43,19 @@ struct sn_flush_device_list {
40 uint32_t sfdl_persistent_busnum; 43 uint32_t sfdl_persistent_busnum;
41 uint32_t sfdl_persistent_segment; 44 uint32_t sfdl_persistent_segment;
42 struct pcibus_info *sfdl_pcibus_info; 45 struct pcibus_info *sfdl_pcibus_info;
46};
47
48/* This struct is kernel only and is not used by the PROM */
49struct sn_flush_device_kernel {
43 spinlock_t sfdl_flush_lock; 50 spinlock_t sfdl_flush_lock;
51 struct sn_flush_device_common *common;
44}; 52};
45 53
46/* 54/*
47 * **widget_p - Used as an array[wid_num][device] of sn_flush_device_list. 55 * **widget_p - Used as an array[wid_num][device] of sn_flush_device_kernel.
48 */ 56 */
49struct sn_flush_nasid_entry { 57struct sn_flush_nasid_entry {
50 struct sn_flush_device_list **widget_p; /* Used as a array of wid_num */ 58 struct sn_flush_device_kernel **widget_p; // Used as an array of wid_num
51 uint64_t iio_itte[8]; 59 uint64_t iio_itte[8];
52}; 60};
53 61
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
index fcbc748ae433..f1ec1370b3e3 100644
--- a/arch/ia64/sn/kernel/bte_error.c
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -33,7 +33,7 @@ void bte_error_handler(unsigned long);
33 * Wait until all BTE related CRBs are completed 33 * Wait until all BTE related CRBs are completed
34 * and then reset the interfaces. 34 * and then reset the interfaces.
35 */ 35 */
36void shub1_bte_error_handler(unsigned long _nodepda) 36int shub1_bte_error_handler(unsigned long _nodepda)
37{ 37{
38 struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; 38 struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
39 struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; 39 struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
@@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
53 (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { 53 (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
54 BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, 54 BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
55 smp_processor_id())); 55 smp_processor_id()));
56 return; 56 return 1;
57 } 57 }
58 58
59 /* Determine information about our hub */ 59 /* Determine information about our hub */
@@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
81 mod_timer(recovery_timer, HZ * 5); 81 mod_timer(recovery_timer, HZ * 5);
82 BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, 82 BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
83 smp_processor_id())); 83 smp_processor_id()));
84 return; 84 return 1;
85 } 85 }
86 if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { 86 if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
87 87
@@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda)
99 BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", 99 BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
100 err_nodepda, smp_processor_id(), 100 err_nodepda, smp_processor_id(),
101 i)); 101 i));
102 return; 102 return 1;
103 } 103 }
104 } 104 }
105 } 105 }
@@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda)
124 REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); 124 REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
125 125
126 del_timer(recovery_timer); 126 del_timer(recovery_timer);
127 return 0;
128}
129
130/*
131 * Wait until all BTE related CRBs are completed
132 * and then reset the interfaces.
133 */
134int shub2_bte_error_handler(unsigned long _nodepda)
135{
136 struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
137 struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
138 struct bteinfo_s *bte;
139 nasid_t nasid;
140 u64 status;
141 int i;
142
143 nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
144
145 /*
146 * Verify that all the BTEs are complete
147 */
148 for (i = 0; i < BTES_PER_NODE; i++) {
149 bte = &err_nodepda->bte_if[i];
150 status = BTE_LNSTAT_LOAD(bte);
151 if ((status & IBLS_ERROR) || !(status & IBLS_BUSY))
152 continue;
153 mod_timer(recovery_timer, HZ * 5);
154 BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
155 smp_processor_id()));
156 return 1;
157 }
158 if (ia64_sn_bte_recovery(nasid))
159 panic("bte_error_handler(): Fatal BTE Error");
160
161 del_timer(recovery_timer);
162 return 0;
127} 163}
128 164
129/* 165/*
@@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda)
135 struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; 171 struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
136 spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; 172 spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
137 int i; 173 int i;
138 nasid_t nasid;
139 unsigned long irq_flags; 174 unsigned long irq_flags;
140 volatile u64 *notify; 175 volatile u64 *notify;
141 bte_result_t bh_error; 176 bte_result_t bh_error;
@@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda)
160 } 195 }
161 196
162 if (is_shub1()) { 197 if (is_shub1()) {
163 shub1_bte_error_handler(_nodepda); 198 if (shub1_bte_error_handler(_nodepda)) {
199 spin_unlock_irqrestore(recovery_lock, irq_flags);
200 return;
201 }
164 } else { 202 } else {
165 nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); 203 if (shub2_bte_error_handler(_nodepda)) {
166 204 spin_unlock_irqrestore(recovery_lock, irq_flags);
167 if (ia64_sn_bte_recovery(nasid)) 205 return;
168 panic("bte_error_handler(): Fatal BTE Error"); 206 }
169 } 207 }
170 208
171 for (i = 0; i < BTES_PER_NODE; i++) { 209 for (i = 0; i < BTES_PER_NODE; i++) {
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
index 5c5eb01c50f0..56ab6bae00ee 100644
--- a/arch/ia64/sn/kernel/huberror.c
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
32 ret_stuff.v0 = 0; 32 ret_stuff.v0 = 0;
33 hubdev_info = (struct hubdev_info *)arg; 33 hubdev_info = (struct hubdev_info *)arg;
34 nasid = hubdev_info->hdi_nasid; 34 nasid = hubdev_info->hdi_nasid;
35 SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, 35
36 if (is_shub1()) {
37 SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
36 (u64) nasid, 0, 0, 0, 0, 0, 0); 38 (u64) nasid, 0, 0, 0, 0, 0, 0);
37 39
38 if ((int)ret_stuff.v0) 40 if ((int)ret_stuff.v0)
39 panic("hubii_eint_handler(): Fatal TIO Error"); 41 panic("hubii_eint_handler(): Fatal TIO Error");
40 42
41 if (is_shub1()) {
42 if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ 43 if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
43 (void)hubiio_crb_error_handler(hubdev_info); 44 (void)hubiio_crb_error_handler(hubdev_info);
44 } else 45 } else
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
index 318087e35b66..258d9d7aff98 100644
--- a/arch/ia64/sn/kernel/io_init.c
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -76,11 +76,12 @@ static struct sn_pcibus_provider sn_pci_default_provider = {
76}; 76};
77 77
78/* 78/*
79 * Retrieve the DMA Flush List given nasid. This list is needed 79 * Retrieve the DMA Flush List given nasid, widget, and device.
80 * to implement the WAR - Flush DMA data on PIO Reads. 80 * This list is needed to implement the WAR - Flush DMA data on PIO Reads.
81 */ 81 */
82static inline uint64_t 82static inline u64
83sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address) 83sal_get_device_dmaflush_list(u64 nasid, u64 widget_num, u64 device_num,
84 u64 address)
84{ 85{
85 86
86 struct ia64_sal_retval ret_stuff; 87 struct ia64_sal_retval ret_stuff;
@@ -88,17 +89,17 @@ sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
88 ret_stuff.v0 = 0; 89 ret_stuff.v0 = 0;
89 90
90 SAL_CALL_NOLOCK(ret_stuff, 91 SAL_CALL_NOLOCK(ret_stuff,
91 (u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST, 92 (u64) SN_SAL_IOIF_GET_DEVICE_DMAFLUSH_LIST,
92 (u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0, 93 (u64) nasid, (u64) widget_num,
93 0); 94 (u64) device_num, (u64) address, 0, 0, 0);
94 return ret_stuff.v0; 95 return ret_stuff.status;
95 96
96} 97}
97 98
98/* 99/*
99 * Retrieve the hub device info structure for the given nasid. 100 * Retrieve the hub device info structure for the given nasid.
100 */ 101 */
101static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address) 102static inline u64 sal_get_hubdev_info(u64 handle, u64 address)
102{ 103{
103 104
104 struct ia64_sal_retval ret_stuff; 105 struct ia64_sal_retval ret_stuff;
@@ -114,7 +115,7 @@ static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
114/* 115/*
115 * Retrieve the pci bus information given the bus number. 116 * Retrieve the pci bus information given the bus number.
116 */ 117 */
117static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address) 118static inline u64 sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
118{ 119{
119 120
120 struct ia64_sal_retval ret_stuff; 121 struct ia64_sal_retval ret_stuff;
@@ -130,7 +131,7 @@ static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
130/* 131/*
131 * Retrieve the pci device information given the bus and device|function number. 132 * Retrieve the pci device information given the bus and device|function number.
132 */ 133 */
133static inline uint64_t 134static inline u64
134sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev, 135sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
135 u64 sn_irq_info) 136 u64 sn_irq_info)
136{ 137{
@@ -170,12 +171,12 @@ sn_pcidev_info_get(struct pci_dev *dev)
170 */ 171 */
171static void sn_fixup_ionodes(void) 172static void sn_fixup_ionodes(void)
172{ 173{
173 174 struct sn_flush_device_kernel *sn_flush_device_kernel;
174 struct sn_flush_device_list *sn_flush_device_list; 175 struct sn_flush_device_kernel *dev_entry;
175 struct hubdev_info *hubdev; 176 struct hubdev_info *hubdev;
176 uint64_t status; 177 u64 status;
177 uint64_t nasid; 178 u64 nasid;
178 int i, widget; 179 int i, widget, device;
179 180
180 /* 181 /*
181 * Get SGI Specific HUB chipset information. 182 * Get SGI Specific HUB chipset information.
@@ -186,7 +187,7 @@ static void sn_fixup_ionodes(void)
186 nasid = cnodeid_to_nasid(i); 187 nasid = cnodeid_to_nasid(i);
187 hubdev->max_segment_number = 0xffffffff; 188 hubdev->max_segment_number = 0xffffffff;
188 hubdev->max_pcibus_number = 0xff; 189 hubdev->max_pcibus_number = 0xff;
189 status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev)); 190 status = sal_get_hubdev_info(nasid, (u64) __pa(hubdev));
190 if (status) 191 if (status)
191 continue; 192 continue;
192 193
@@ -213,38 +214,49 @@ static void sn_fixup_ionodes(void)
213 214
214 hubdev->hdi_flush_nasid_list.widget_p = 215 hubdev->hdi_flush_nasid_list.widget_p =
215 kmalloc((HUB_WIDGET_ID_MAX + 1) * 216 kmalloc((HUB_WIDGET_ID_MAX + 1) *
216 sizeof(struct sn_flush_device_list *), GFP_KERNEL); 217 sizeof(struct sn_flush_device_kernel *),
217 218 GFP_KERNEL);
218 memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0, 219 memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0,
219 (HUB_WIDGET_ID_MAX + 1) * 220 (HUB_WIDGET_ID_MAX + 1) *
220 sizeof(struct sn_flush_device_list *)); 221 sizeof(struct sn_flush_device_kernel *));
221 222
222 for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) { 223 for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
223 sn_flush_device_list = kmalloc(DEV_PER_WIDGET * 224 sn_flush_device_kernel = kmalloc(DEV_PER_WIDGET *
224 sizeof(struct 225 sizeof(struct
225 sn_flush_device_list), 226 sn_flush_device_kernel),
226 GFP_KERNEL); 227 GFP_KERNEL);
227 memset(sn_flush_device_list, 0x0, 228 if (!sn_flush_device_kernel)
229 BUG();
230 memset(sn_flush_device_kernel, 0x0,
228 DEV_PER_WIDGET * 231 DEV_PER_WIDGET *
229 sizeof(struct sn_flush_device_list)); 232 sizeof(struct sn_flush_device_kernel));
230 233
231 status = 234 dev_entry = sn_flush_device_kernel;
232 sal_get_widget_dmaflush_list(nasid, widget, 235 for (device = 0; device < DEV_PER_WIDGET;
233 (uint64_t) 236 device++,dev_entry++) {
234 __pa 237 dev_entry->common = kmalloc(sizeof(struct
235 (sn_flush_device_list)); 238 sn_flush_device_common),
236 if (status) { 239 GFP_KERNEL);
237 kfree(sn_flush_device_list); 240 if (!dev_entry->common)
238 continue; 241 BUG();
242 memset(dev_entry->common, 0x0, sizeof(struct
243 sn_flush_device_common));
244
245 status = sal_get_device_dmaflush_list(nasid,
246 widget,
247 device,
248 (u64)(dev_entry->common));
249 if (status)
250 BUG();
251
252 spin_lock_init(&dev_entry->sfdl_flush_lock);
239 } 253 }
240 254
241 spin_lock_init(&sn_flush_device_list->sfdl_flush_lock); 255 if (sn_flush_device_kernel)
242 hubdev->hdi_flush_nasid_list.widget_p[widget] = 256 hubdev->hdi_flush_nasid_list.widget_p[widget] =
243 sn_flush_device_list; 257 sn_flush_device_kernel;
244 } 258 }
245
246 } 259 }
247
248} 260}
249 261
250/* 262/*
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
deleted file mode 100644
index 5483a9f227d4..000000000000
--- a/arch/ia64/sn/kernel/xpc.h
+++ /dev/null
@@ -1,1273 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition Communication (XPC) structures and macros.
12 */
13
14#ifndef _IA64_SN_KERNEL_XPC_H
15#define _IA64_SN_KERNEL_XPC_H
16
17
18#include <linux/config.h>
19#include <linux/interrupt.h>
20#include <linux/sysctl.h>
21#include <linux/device.h>
22#include <asm/pgtable.h>
23#include <asm/processor.h>
24#include <asm/sn/bte.h>
25#include <asm/sn/clksupport.h>
26#include <asm/sn/addrs.h>
27#include <asm/sn/mspec.h>
28#include <asm/sn/shub_mmr.h>
29#include <asm/sn/xp.h>
30
31
32/*
33 * XPC Version numbers consist of a major and minor number. XPC can always
34 * talk to versions with same major #, and never talk to versions with a
35 * different major #.
36 */
37#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
38#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
39#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
40
41
42/*
43 * The next macros define word or bit representations for given
44 * C-brick nasid in either the SAL provided bit array representing
45 * nasids in the partition/machine or the AMO_t array used for
46 * inter-partition initiation communications.
47 *
48 * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
49 * such, some space will be saved by insisting that nasid information
50 * passed from SAL always be packed for C-Bricks and the
51 * cross-partition interrupts use the same packing scheme.
52 */
53#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
54#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
55#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
56 (1UL << XPC_NASID_B_INDEX(_n)))
57#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
58
59#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
60#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
61
62/* define the process name of HB checker and the CPU it is pinned to */
63#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
64#define XPC_HB_CHECK_CPU 0
65
66/* define the process name of the discovery thread */
67#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
68
69
70/*
71 * the reserved page
72 *
73 * SAL reserves one page of memory per partition for XPC. Though a full page
74 * in length (16384 bytes), its starting address is not page aligned, but it
75 * is cacheline aligned. The reserved page consists of the following:
76 *
77 * reserved page header
78 *
79 * The first cacheline of the reserved page contains the header
80 * (struct xpc_rsvd_page). Before SAL initialization has completed,
81 * SAL has set up the following fields of the reserved page header:
82 * SAL_signature, SAL_version, partid, and nasids_size. The other
83 * fields are set up by XPC. (xpc_rsvd_page points to the local
84 * partition's reserved page.)
85 *
86 * part_nasids mask
87 * mach_nasids mask
88 *
89 * SAL also sets up two bitmaps (or masks), one that reflects the actual
90 * nasids in this partition (part_nasids), and the other that reflects
91 * the actual nasids in the entire machine (mach_nasids). We're only
92 * interested in the even numbered nasids (which contain the processors
93 * and/or memory), so we only need half as many bits to represent the
94 * nasids. The part_nasids mask is located starting at the first cacheline
95 * following the reserved page header. The mach_nasids mask follows right
96 * after the part_nasids mask. The size in bytes of each mask is reflected
97 * by the reserved page header field 'nasids_size'. (Local partition's
98 * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
99 *
100 * vars
101 * vars part
102 *
103 * Immediately following the mach_nasids mask are the XPC variables
104 * required by other partitions. First are those that are generic to all
105 * partitions (vars), followed on the next available cacheline by those
106 * which are partition specific (vars part). These are setup by XPC.
107 * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
108 *
109 * Note: Until vars_pa is set, the partition XPC code has not been initialized.
110 */
111struct xpc_rsvd_page {
112 u64 SAL_signature; /* SAL: unique signature */
113 u64 SAL_version; /* SAL: version */
114 u8 partid; /* SAL: partition ID */
115 u8 version;
116 u8 pad1[6]; /* align to next u64 in cacheline */
117 volatile u64 vars_pa;
118 struct timespec stamp; /* time when reserved page was setup by XPC */
119 u64 pad2[9]; /* align to last u64 in cacheline */
120 u64 nasids_size; /* SAL: size of each nasid mask in bytes */
121};
122
123#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
124
125#define XPC_SUPPORTS_RP_STAMP(_version) \
126 (_version >= _XPC_VERSION(1,1))
127
128/*
129 * compare stamps - the return value is:
130 *
131 * < 0, if stamp1 < stamp2
132 * = 0, if stamp1 == stamp2
133 * > 0, if stamp1 > stamp2
134 */
135static inline int
136xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
137{
138 int ret;
139
140
141 if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
142 ret = stamp1->tv_nsec - stamp2->tv_nsec;
143 }
144 return ret;
145}
146
147
148/*
149 * Define the structures by which XPC variables can be exported to other
150 * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
151 */
152
153/*
154 * The following structure describes the partition generic variables
155 * needed by other partitions in order to properly initialize.
156 *
157 * struct xpc_vars version number also applies to struct xpc_vars_part.
158 * Changes to either structure and/or related functionality should be
159 * reflected by incrementing either the major or minor version numbers
160 * of struct xpc_vars.
161 */
162struct xpc_vars {
163 u8 version;
164 u64 heartbeat;
165 u64 heartbeating_to_mask;
166 u64 heartbeat_offline; /* if 0, heartbeat should be changing */
167 int act_nasid;
168 int act_phys_cpuid;
169 u64 vars_part_pa;
170 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
171 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
172};
173
174#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
175
176#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
177 (_version >= _XPC_VERSION(3,1))
178
179
180static inline int
181xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
182{
183 return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
184}
185
186static inline void
187xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
188{
189 u64 old_mask, new_mask;
190
191 do {
192 old_mask = vars->heartbeating_to_mask;
193 new_mask = (old_mask | (1UL << partid));
194 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
195 old_mask);
196}
197
198static inline void
199xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
200{
201 u64 old_mask, new_mask;
202
203 do {
204 old_mask = vars->heartbeating_to_mask;
205 new_mask = (old_mask & ~(1UL << partid));
206 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
207 old_mask);
208}
209
210
211/*
212 * The AMOs page consists of a number of AMO variables which are divided into
213 * four groups, The first two groups are used to identify an IRQ's sender.
214 * These two groups consist of 64 and 128 AMO variables respectively. The last
215 * two groups, consisting of just one AMO variable each, are used to identify
216 * the remote partitions that are currently engaged (from the viewpoint of
217 * the XPC running on the remote partition).
218 */
219#define XPC_NOTIFY_IRQ_AMOS 0
220#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
221#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
222#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
223
224
225/*
226 * The following structure describes the per partition specific variables.
227 *
228 * An array of these structures, one per partition, will be defined. As a
229 * partition becomes active XPC will copy the array entry corresponding to
230 * itself from that partition. It is desirable that the size of this
231 * structure evenly divide into a cacheline, such that none of the entries
232 * in this array crosses a cacheline boundary. As it is now, each entry
233 * occupies half a cacheline.
234 */
235struct xpc_vars_part {
236 volatile u64 magic;
237
238 u64 openclose_args_pa; /* physical address of open and close args */
239 u64 GPs_pa; /* physical address of Get/Put values */
240
241 u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
242 int IPI_nasid; /* nasid of where to send IPIs */
243 int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
244
245 u8 nchannels; /* #of defined channels supported */
246
247 u8 reserved[23]; /* pad to a full 64 bytes */
248};
249
250/*
251 * The vars_part MAGIC numbers play a part in the first contact protocol.
252 *
253 * MAGIC1 indicates that the per partition specific variables for a remote
254 * partition have been initialized by this partition.
255 *
256 * MAGIC2 indicates that this partition has pulled the remote partititions
257 * per partition variables that pertain to this partition.
258 */
259#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
260#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
261
262
263/* the reserved page sizes and offsets */
264
265#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
266#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
267
268#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE)
269#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
270#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words)
271#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE)
272
273
274/*
275 * Functions registered by add_timer() or called by kernel_thread() only
276 * allow for a single 64-bit argument. The following macros can be used to
277 * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
278 * the passed argument.
279 */
280#define XPC_PACK_ARGS(_arg1, _arg2) \
281 ((((u64) _arg1) & 0xffffffff) | \
282 ((((u64) _arg2) & 0xffffffff) << 32))
283
284#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
285#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
286
287
288
289/*
290 * Define a Get/Put value pair (pointers) used with a message queue.
291 */
292struct xpc_gp {
293 volatile s64 get; /* Get value */
294 volatile s64 put; /* Put value */
295};
296
297#define XPC_GP_SIZE \
298 L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
299
300
301
302/*
303 * Define a structure that contains arguments associated with opening and
304 * closing a channel.
305 */
306struct xpc_openclose_args {
307 u16 reason; /* reason why channel is closing */
308 u16 msg_size; /* sizeof each message entry */
309 u16 remote_nentries; /* #of message entries in remote msg queue */
310 u16 local_nentries; /* #of message entries in local msg queue */
311 u64 local_msgqueue_pa; /* physical address of local message queue */
312};
313
314#define XPC_OPENCLOSE_ARGS_SIZE \
315 L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
316
317
318
319/* struct xpc_msg flags */
320
321#define XPC_M_DONE 0x01 /* msg has been received/consumed */
322#define XPC_M_READY 0x02 /* msg is ready to be sent */
323#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
324
325
326#define XPC_MSG_ADDRESS(_payload) \
327 ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
328
329
330
331/*
332 * Defines notify entry.
333 *
334 * This is used to notify a message's sender that their message was received
335 * and consumed by the intended recipient.
336 */
337struct xpc_notify {
338 struct semaphore sema; /* notify semaphore */
339 volatile u8 type; /* type of notification */
340
341 /* the following two fields are only used if type == XPC_N_CALL */
342 xpc_notify_func func; /* user's notify function */
343 void *key; /* pointer to user's key */
344};
345
346/* struct xpc_notify type of notification */
347
348#define XPC_N_CALL 0x01 /* notify function provided by user */
349
350
351
352/*
353 * Define the structure that manages all the stuff required by a channel. In
354 * particular, they are used to manage the messages sent across the channel.
355 *
356 * This structure is private to a partition, and is NOT shared across the
357 * partition boundary.
358 *
359 * There is an array of these structures for each remote partition. It is
360 * allocated at the time a partition becomes active. The array contains one
361 * of these structures for each potential channel connection to that partition.
362 *
363 * Each of these structures manages two message queues (circular buffers).
364 * They are allocated at the time a channel connection is made. One of
365 * these message queues (local_msgqueue) holds the locally created messages
366 * that are destined for the remote partition. The other of these message
367 * queues (remote_msgqueue) is a locally cached copy of the remote partition's
368 * own local_msgqueue.
369 *
370 * The following is a description of the Get/Put pointers used to manage these
371 * two message queues. Consider the local_msgqueue to be on one partition
372 * and the remote_msgqueue to be its cached copy on another partition. A
373 * description of what each of the lettered areas contains is included.
374 *
375 *
376 * local_msgqueue remote_msgqueue
377 *
378 * |/////////| |/////////|
379 * w_remote_GP.get --> +---------+ |/////////|
380 * | F | |/////////|
381 * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
382 * | | | |
383 * | | | E |
384 * | | | |
385 * | | +---------+ <-- w_local_GP.get
386 * | B | |/////////|
387 * | | |////D////|
388 * | | |/////////|
389 * | | +---------+ <-- w_remote_GP.put
390 * | | |////C////|
391 * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
392 * | | |/////////|
393 * | A | |/////////|
394 * | | |/////////|
395 * w_local_GP.put --> +---------+ |/////////|
396 * |/////////| |/////////|
397 *
398 *
399 * ( remote_GP.[get|put] are cached copies of the remote
400 * partition's local_GP->[get|put], and thus their values can
401 * lag behind their counterparts on the remote partition. )
402 *
403 *
404 * A - Messages that have been allocated, but have not yet been sent to the
405 * remote partition.
406 *
407 * B - Messages that have been sent, but have not yet been acknowledged by the
408 * remote partition as having been received.
409 *
410 * C - Area that needs to be prepared for the copying of sent messages, by
411 * the clearing of the message flags of any previously received messages.
412 *
413 * D - Area into which sent messages are to be copied from the remote
414 * partition's local_msgqueue and then delivered to their intended
415 * recipients. [ To allow for a multi-message copy, another pointer
416 * (next_msg_to_pull) has been added to keep track of the next message
417 * number needing to be copied (pulled). It chases after w_remote_GP.put.
418 * Any messages lying between w_local_GP.get and next_msg_to_pull have
419 * been copied and are ready to be delivered. ]
420 *
421 * E - Messages that have been copied and delivered, but have not yet been
422 * acknowledged by the recipient as having been received.
423 *
424 * F - Messages that have been acknowledged, but XPC has not yet notified the
425 * sender that the message was received by its intended recipient.
426 * This is also an area that needs to be prepared for the allocating of
427 * new messages, by the clearing of the message flags of the acknowledged
428 * messages.
429 */
430struct xpc_channel {
431 partid_t partid; /* ID of remote partition connected */
432 spinlock_t lock; /* lock for updating this structure */
433 u32 flags; /* general flags */
434
435 enum xpc_retval reason; /* reason why channel is disconnect'g */
436 int reason_line; /* line# disconnect initiated from */
437
438 u16 number; /* channel # */
439
440 u16 msg_size; /* sizeof each msg entry */
441 u16 local_nentries; /* #of msg entries in local msg queue */
442 u16 remote_nentries; /* #of msg entries in remote msg queue*/
443
444 void *local_msgqueue_base; /* base address of kmalloc'd space */
445 struct xpc_msg *local_msgqueue; /* local message queue */
446 void *remote_msgqueue_base; /* base address of kmalloc'd space */
447 struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */
448 /* local message queue */
449 u64 remote_msgqueue_pa; /* phys addr of remote partition's */
450 /* local message queue */
451
452 atomic_t references; /* #of external references to queues */
453
454 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
455 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
456
457 u8 delayed_IPI_flags; /* IPI flags received, but delayed */
458 /* action until channel disconnected */
459
460 /* queue of msg senders who want to be notified when msg received */
461
462 atomic_t n_to_notify; /* #of msg senders to notify */
463 struct xpc_notify *notify_queue;/* notify queue for messages sent */
464
465 xpc_channel_func func; /* user's channel function */
466 void *key; /* pointer to user's key */
467
468 struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
469 struct semaphore wdisconnect_sema; /* wait for channel disconnect */
470
471 struct xpc_openclose_args *local_openclose_args; /* args passed on */
472 /* opening or closing of channel */
473
474 /* various flavors of local and remote Get/Put values */
475
476 struct xpc_gp *local_GP; /* local Get/Put values */
477 struct xpc_gp remote_GP; /* remote Get/Put values */
478 struct xpc_gp w_local_GP; /* working local Get/Put values */
479 struct xpc_gp w_remote_GP; /* working remote Get/Put values */
480 s64 next_msg_to_pull; /* Put value of next msg to pull */
481
482 /* kthread management related fields */
483
484// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps
485// >>> allow the assigned limit be unbounded and let the idle limit be dynamic
486// >>> dependent on activity over the last interval of time
487 atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
488 u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */
489 atomic_t kthreads_idle; /* #of kthreads idle waiting for work */
490 u32 kthreads_idle_limit; /* limit on #of kthreads idle */
491 atomic_t kthreads_active; /* #of kthreads actively working */
492 // >>> following field is temporary
493 u32 kthreads_created; /* total #of kthreads created */
494
495 wait_queue_head_t idle_wq; /* idle kthread wait queue */
496
497} ____cacheline_aligned;
498
499
500/* struct xpc_channel flags */
501
502#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
503
504#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */
505#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */
506#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */
507#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */
508
509#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */
510#define XPC_C_CONNECTCALLOUT 0x00000040 /* channel connected callout made */
511#define XPC_C_CONNECTED 0x00000080 /* local channel is connected */
512#define XPC_C_CONNECTING 0x00000100 /* channel is being connected */
513
514#define XPC_C_RCLOSEREPLY 0x00000200 /* remote close channel reply */
515#define XPC_C_CLOSEREPLY 0x00000400 /* local close channel reply */
516#define XPC_C_RCLOSEREQUEST 0x00000800 /* remote close channel request */
517#define XPC_C_CLOSEREQUEST 0x00001000 /* local close channel request */
518
519#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */
520#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */
521#define XPC_C_DISCONNECTCALLOUT 0x00008000 /* chan disconnected callout made */
522#define XPC_C_WDISCONNECT 0x00010000 /* waiting for channel disconnect */
523
524
525
526/*
527 * Manages channels on a partition basis. There is one of these structures
528 * for each partition (a partition will never utilize the structure that
529 * represents itself).
530 */
531struct xpc_partition {
532
533 /* XPC HB infrastructure */
534
535 u8 remote_rp_version; /* version# of partition's rsvd pg */
536 struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
537 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
538 u64 remote_vars_pa; /* phys addr of partition's vars */
539 u64 remote_vars_part_pa; /* phys addr of partition's vars part */
540 u64 last_heartbeat; /* HB at last read */
541 u64 remote_amos_page_pa; /* phys addr of partition's amos page */
542 int remote_act_nasid; /* active part's act/deact nasid */
543 int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
544 u32 act_IRQ_rcvd; /* IRQs since activation */
545 spinlock_t act_lock; /* protect updating of act_state */
546 u8 act_state; /* from XPC HB viewpoint */
547 u8 remote_vars_version; /* version# of partition's vars */
548 enum xpc_retval reason; /* reason partition is deactivating */
549 int reason_line; /* line# deactivation initiated from */
550 int reactivate_nasid; /* nasid in partition to reactivate */
551
552 unsigned long disengage_request_timeout; /* timeout in jiffies */
553 struct timer_list disengage_request_timer;
554
555
556 /* XPC infrastructure referencing and teardown control */
557
558 volatile u8 setup_state; /* infrastructure setup state */
559 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
560 atomic_t references; /* #of references to infrastructure */
561
562
563 /*
564 * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
565 * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
566 * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
567 * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
568 */
569
570
571 u8 nchannels; /* #of defined channels supported */
572 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
573 atomic_t nchannels_engaged;/* #of channels engaged with remote part */
574 struct xpc_channel *channels;/* array of channel structures */
575
576 void *local_GPs_base; /* base address of kmalloc'd space */
577 struct xpc_gp *local_GPs; /* local Get/Put values */
578 void *remote_GPs_base; /* base address of kmalloc'd space */
579 struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */
580 /* values */
581 u64 remote_GPs_pa; /* phys address of remote partition's local */
582 /* Get/Put values */
583
584
585 /* fields used to pass args when opening or closing a channel */
586
587 void *local_openclose_args_base; /* base address of kmalloc'd space */
588 struct xpc_openclose_args *local_openclose_args; /* local's args */
589 void *remote_openclose_args_base; /* base address of kmalloc'd space */
590 struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
591 /* args */
592 u64 remote_openclose_args_pa; /* phys addr of remote's args */
593
594
595 /* IPI sending, receiving and handling related fields */
596
597 int remote_IPI_nasid; /* nasid of where to send IPIs */
598 int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
599 AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
600
601 AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
602 u64 local_IPI_amo; /* IPI amo flags yet to be handled */
603 char IPI_owner[8]; /* IPI owner's name */
604 struct timer_list dropped_IPI_timer; /* dropped IPI timer */
605
606 spinlock_t IPI_lock; /* IPI handler lock */
607
608
609 /* channel manager related fields */
610
611 atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
612 wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
613
614} ____cacheline_aligned;
615
616
617/* struct xpc_partition act_state values (for XPC HB) */
618
619#define XPC_P_INACTIVE 0x00 /* partition is not active */
620#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */
621#define XPC_P_ACTIVATING 0x02 /* activation thread started */
622#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */
623#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */
624
625
626#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
627 xpc_deactivate_partition(__LINE__, (_p), (_reason))
628
629
630/* struct xpc_partition setup_state values */
631
632#define XPC_P_UNSET 0x00 /* infrastructure was never setup */
633#define XPC_P_SETUP 0x01 /* infrastructure is setup */
634#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
635#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
636
637
638
639/*
640 * struct xpc_partition IPI_timer #of seconds to wait before checking for
641 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
642 * after the IPI was received.
643 */
644#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
645
646
647/* number of seconds to wait for other partitions to disengage */
648#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
649
650/* interval in seconds to print 'waiting disengagement' messages */
651#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
652
653
654#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
655
656
657
658/* found in xp_main.c */
659extern struct xpc_registration xpc_registrations[];
660
661
662/* found in xpc_main.c */
663extern struct device *xpc_part;
664extern struct device *xpc_chan;
665extern int xpc_disengage_request_timelimit;
666extern irqreturn_t xpc_notify_IRQ_handler(int, void *, struct pt_regs *);
667extern void xpc_dropped_IPI_check(struct xpc_partition *);
668extern void xpc_activate_partition(struct xpc_partition *);
669extern void xpc_activate_kthreads(struct xpc_channel *, int);
670extern void xpc_create_kthreads(struct xpc_channel *, int);
671extern void xpc_disconnect_wait(int);
672
673
674/* found in xpc_partition.c */
675extern int xpc_exiting;
676extern struct xpc_vars *xpc_vars;
677extern struct xpc_rsvd_page *xpc_rsvd_page;
678extern struct xpc_vars_part *xpc_vars_part;
679extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
680extern char xpc_remote_copy_buffer[];
681extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
682extern void xpc_allow_IPI_ops(void);
683extern void xpc_restrict_IPI_ops(void);
684extern int xpc_identify_act_IRQ_sender(void);
685extern int xpc_partition_disengaged(struct xpc_partition *);
686extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
687extern void xpc_mark_partition_inactive(struct xpc_partition *);
688extern void xpc_discovery(void);
689extern void xpc_check_remote_hb(void);
690extern void xpc_deactivate_partition(const int, struct xpc_partition *,
691 enum xpc_retval);
692extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *);
693
694
695/* found in xpc_channel.c */
696extern void xpc_initiate_connect(int);
697extern void xpc_initiate_disconnect(int);
698extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **);
699extern enum xpc_retval xpc_initiate_send(partid_t, int, void *);
700extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *,
701 xpc_notify_func, void *);
702extern void xpc_initiate_received(partid_t, int, void *);
703extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *);
704extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *);
705extern void xpc_process_channel_activity(struct xpc_partition *);
706extern void xpc_connected_callout(struct xpc_channel *);
707extern void xpc_deliver_msg(struct xpc_channel *);
708extern void xpc_disconnect_channel(const int, struct xpc_channel *,
709 enum xpc_retval, unsigned long *);
710extern void xpc_disconnecting_callout(struct xpc_channel *);
711extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
712extern void xpc_teardown_infrastructure(struct xpc_partition *);
713
714
715
716static inline void
717xpc_wakeup_channel_mgr(struct xpc_partition *part)
718{
719 if (atomic_inc_return(&part->channel_mgr_requests) == 1) {
720 wake_up(&part->channel_mgr_wq);
721 }
722}
723
724
725
726/*
727 * These next two inlines are used to keep us from tearing down a channel's
728 * msg queues while a thread may be referencing them.
729 */
730static inline void
731xpc_msgqueue_ref(struct xpc_channel *ch)
732{
733 atomic_inc(&ch->references);
734}
735
736static inline void
737xpc_msgqueue_deref(struct xpc_channel *ch)
738{
739 s32 refs = atomic_dec_return(&ch->references);
740
741 DBUG_ON(refs < 0);
742 if (refs == 0) {
743 xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
744 }
745}
746
747
748
749#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
750 xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
751
752
753/*
754 * These two inlines are used to keep us from tearing down a partition's
755 * setup infrastructure while a thread may be referencing it.
756 */
757static inline void
758xpc_part_deref(struct xpc_partition *part)
759{
760 s32 refs = atomic_dec_return(&part->references);
761
762
763 DBUG_ON(refs < 0);
764 if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) {
765 wake_up(&part->teardown_wq);
766 }
767}
768
769static inline int
770xpc_part_ref(struct xpc_partition *part)
771{
772 int setup;
773
774
775 atomic_inc(&part->references);
776 setup = (part->setup_state == XPC_P_SETUP);
777 if (!setup) {
778 xpc_part_deref(part);
779 }
780 return setup;
781}
782
783
784
785/*
786 * The following macro is to be used for the setting of the reason and
787 * reason_line fields in both the struct xpc_channel and struct xpc_partition
788 * structures.
789 */
790#define XPC_SET_REASON(_p, _reason, _line) \
791 { \
792 (_p)->reason = _reason; \
793 (_p)->reason_line = _line; \
794 }
795
796
797
798/*
799 * This next set of inlines are used to keep track of when a partition is
800 * potentially engaged in accessing memory belonging to another partition.
801 */
802
803static inline void
804xpc_mark_partition_engaged(struct xpc_partition *part)
805{
806 unsigned long irq_flags;
807 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
808 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
809
810
811 local_irq_save(irq_flags);
812
813 /* set bit corresponding to our partid in remote partition's AMO */
814 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
815 (1UL << sn_partition_id));
816 /*
817 * We must always use the nofault function regardless of whether we
818 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
819 * didn't, we'd never know that the other partition is down and would
820 * keep sending IPIs and AMOs to it until the heartbeat times out.
821 */
822 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
823 variable), xp_nofault_PIOR_target));
824
825 local_irq_restore(irq_flags);
826}
827
828static inline void
829xpc_mark_partition_disengaged(struct xpc_partition *part)
830{
831 unsigned long irq_flags;
832 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
833 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
834
835
836 local_irq_save(irq_flags);
837
838 /* clear bit corresponding to our partid in remote partition's AMO */
839 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
840 ~(1UL << sn_partition_id));
841 /*
842 * We must always use the nofault function regardless of whether we
843 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
844 * didn't, we'd never know that the other partition is down and would
845 * keep sending IPIs and AMOs to it until the heartbeat times out.
846 */
847 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
848 variable), xp_nofault_PIOR_target));
849
850 local_irq_restore(irq_flags);
851}
852
853static inline void
854xpc_request_partition_disengage(struct xpc_partition *part)
855{
856 unsigned long irq_flags;
857 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
858 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
859
860
861 local_irq_save(irq_flags);
862
863 /* set bit corresponding to our partid in remote partition's AMO */
864 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
865 (1UL << sn_partition_id));
866 /*
867 * We must always use the nofault function regardless of whether we
868 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
869 * didn't, we'd never know that the other partition is down and would
870 * keep sending IPIs and AMOs to it until the heartbeat times out.
871 */
872 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
873 variable), xp_nofault_PIOR_target));
874
875 local_irq_restore(irq_flags);
876}
877
878static inline void
879xpc_cancel_partition_disengage_request(struct xpc_partition *part)
880{
881 unsigned long irq_flags;
882 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
883 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
884
885
886 local_irq_save(irq_flags);
887
888 /* clear bit corresponding to our partid in remote partition's AMO */
889 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
890 ~(1UL << sn_partition_id));
891 /*
892 * We must always use the nofault function regardless of whether we
893 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
894 * didn't, we'd never know that the other partition is down and would
895 * keep sending IPIs and AMOs to it until the heartbeat times out.
896 */
897 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
898 variable), xp_nofault_PIOR_target));
899
900 local_irq_restore(irq_flags);
901}
902
903static inline u64
904xpc_partition_engaged(u64 partid_mask)
905{
906 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
907
908
909 /* return our partition's AMO variable ANDed with partid_mask */
910 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
911 partid_mask);
912}
913
914static inline u64
915xpc_partition_disengage_requested(u64 partid_mask)
916{
917 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
918
919
920 /* return our partition's AMO variable ANDed with partid_mask */
921 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
922 partid_mask);
923}
924
925static inline void
926xpc_clear_partition_engaged(u64 partid_mask)
927{
928 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
929
930
931 /* clear bit(s) based on partid_mask in our partition's AMO */
932 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
933 ~partid_mask);
934}
935
936static inline void
937xpc_clear_partition_disengage_request(u64 partid_mask)
938{
939 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
940
941
942 /* clear bit(s) based on partid_mask in our partition's AMO */
943 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
944 ~partid_mask);
945}
946
947
948
949/*
950 * The following set of macros and inlines are used for the sending and
951 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
952 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
953 * the other that is associated with channel activity (SGI_XPC_NOTIFY).
954 */
955
956static inline u64
957xpc_IPI_receive(AMO_t *amo)
958{
959 return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR);
960}
961
962
963static inline enum xpc_retval
964xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
965{
966 int ret = 0;
967 unsigned long irq_flags;
968
969
970 local_irq_save(irq_flags);
971
972 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag);
973 sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
974
975 /*
976 * We must always use the nofault function regardless of whether we
977 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
978 * didn't, we'd never know that the other partition is down and would
979 * keep sending IPIs and AMOs to it until the heartbeat times out.
980 */
981 ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
982 xp_nofault_PIOR_target));
983
984 local_irq_restore(irq_flags);
985
986 return ((ret == 0) ? xpcSuccess : xpcPioReadError);
987}
988
989
990/*
991 * IPIs associated with SGI_XPC_ACTIVATE IRQ.
992 */
993
994/*
995 * Flag the appropriate AMO variable and send an IPI to the specified node.
996 */
997static inline void
998xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
999 int to_phys_cpuid)
1000{
1001 int w_index = XPC_NASID_W_INDEX(from_nasid);
1002 int b_index = XPC_NASID_B_INDEX(from_nasid);
1003 AMO_t *amos = (AMO_t *) __va(amos_page_pa +
1004 (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
1005
1006
1007 (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
1008 to_phys_cpuid, SGI_XPC_ACTIVATE);
1009}
1010
1011static inline void
1012xpc_IPI_send_activate(struct xpc_vars *vars)
1013{
1014 xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
1015 vars->act_nasid, vars->act_phys_cpuid);
1016}
1017
1018static inline void
1019xpc_IPI_send_activated(struct xpc_partition *part)
1020{
1021 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
1022 part->remote_act_nasid, part->remote_act_phys_cpuid);
1023}
1024
1025static inline void
1026xpc_IPI_send_reactivate(struct xpc_partition *part)
1027{
1028 xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
1029 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
1030}
1031
1032static inline void
1033xpc_IPI_send_disengage(struct xpc_partition *part)
1034{
1035 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
1036 part->remote_act_nasid, part->remote_act_phys_cpuid);
1037}
1038
1039
1040/*
1041 * IPIs associated with SGI_XPC_NOTIFY IRQ.
1042 */
1043
1044/*
1045 * Send an IPI to the remote partition that is associated with the
1046 * specified channel.
1047 */
1048#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
1049 xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
1050
1051static inline void
1052xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
1053 unsigned long *irq_flags)
1054{
1055 struct xpc_partition *part = &xpc_partitions[ch->partid];
1056 enum xpc_retval ret;
1057
1058
1059 if (likely(part->act_state != XPC_P_DEACTIVATING)) {
1060 ret = xpc_IPI_send(part->remote_IPI_amo_va,
1061 (u64) ipi_flag << (ch->number * 8),
1062 part->remote_IPI_nasid,
1063 part->remote_IPI_phys_cpuid,
1064 SGI_XPC_NOTIFY);
1065 dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
1066 ipi_flag_string, ch->partid, ch->number, ret);
1067 if (unlikely(ret != xpcSuccess)) {
1068 if (irq_flags != NULL) {
1069 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1070 }
1071 XPC_DEACTIVATE_PARTITION(part, ret);
1072 if (irq_flags != NULL) {
1073 spin_lock_irqsave(&ch->lock, *irq_flags);
1074 }
1075 }
1076 }
1077}
1078
1079
1080/*
1081 * Make it look like the remote partition, which is associated with the
1082 * specified channel, sent us an IPI. This faked IPI will be handled
1083 * by xpc_dropped_IPI_check().
1084 */
1085#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
1086 xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
1087
1088static inline void
1089xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
1090 char *ipi_flag_string)
1091{
1092 struct xpc_partition *part = &xpc_partitions[ch->partid];
1093
1094
1095 FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable),
1096 FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8)));
1097 dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
1098 ipi_flag_string, ch->partid, ch->number);
1099}
1100
1101
1102/*
1103 * The sending and receiving of IPIs includes the setting of an AMO variable
1104 * to indicate the reason the IPI was sent. The 64-bit variable is divided
1105 * up into eight bytes, ordered from right to left. Byte zero pertains to
1106 * channel 0, byte one to channel 1, and so on. Each byte is described by
1107 * the following IPI flags.
1108 */
1109
1110#define XPC_IPI_CLOSEREQUEST 0x01
1111#define XPC_IPI_CLOSEREPLY 0x02
1112#define XPC_IPI_OPENREQUEST 0x04
1113#define XPC_IPI_OPENREPLY 0x08
1114#define XPC_IPI_MSGREQUEST 0x10
1115
1116
1117/* given an AMO variable and a channel#, get its associated IPI flags */
1118#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
1119#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
1120
1121#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & 0x0f0f0f0f0f0f0f0f)
1122#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & 0x1010101010101010)
1123
1124
1125static inline void
1126xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
1127{
1128 struct xpc_openclose_args *args = ch->local_openclose_args;
1129
1130
1131 args->reason = ch->reason;
1132
1133 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
1134}
1135
1136static inline void
1137xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
1138{
1139 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
1140}
1141
1142static inline void
1143xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
1144{
1145 struct xpc_openclose_args *args = ch->local_openclose_args;
1146
1147
1148 args->msg_size = ch->msg_size;
1149 args->local_nentries = ch->local_nentries;
1150
1151 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
1152}
1153
1154static inline void
1155xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
1156{
1157 struct xpc_openclose_args *args = ch->local_openclose_args;
1158
1159
1160 args->remote_nentries = ch->remote_nentries;
1161 args->local_nentries = ch->local_nentries;
1162 args->local_msgqueue_pa = __pa(ch->local_msgqueue);
1163
1164 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
1165}
1166
1167static inline void
1168xpc_IPI_send_msgrequest(struct xpc_channel *ch)
1169{
1170 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
1171}
1172
1173static inline void
1174xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
1175{
1176 XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
1177}
1178
1179
1180/*
1181 * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
1182 * pages are located in the lowest granule. The lowest granule uses 4k pages
1183 * for cached references and an alternate TLB handler to never provide a
1184 * cacheable mapping for the entire region. This will prevent speculative
1185 * reading of cached copies of our lines from being issued which will cause
1186 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
1187 * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
1188 * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
1189 * activation and 2 AMO variables for partition deactivation.
1190 */
1191static inline AMO_t *
1192xpc_IPI_init(int index)
1193{
1194 AMO_t *amo = xpc_vars->amos_page + index;
1195
1196
1197 (void) xpc_IPI_receive(amo); /* clear AMO variable */
1198 return amo;
1199}
1200
1201
1202
1203static inline enum xpc_retval
1204xpc_map_bte_errors(bte_result_t error)
1205{
1206 switch (error) {
1207 case BTE_SUCCESS: return xpcSuccess;
1208 case BTEFAIL_DIR: return xpcBteDirectoryError;
1209 case BTEFAIL_POISON: return xpcBtePoisonError;
1210 case BTEFAIL_WERR: return xpcBteWriteError;
1211 case BTEFAIL_ACCESS: return xpcBteAccessError;
1212 case BTEFAIL_PWERR: return xpcBtePWriteError;
1213 case BTEFAIL_PRERR: return xpcBtePReadError;
1214 case BTEFAIL_TOUT: return xpcBteTimeOutError;
1215 case BTEFAIL_XTERR: return xpcBteXtalkError;
1216 case BTEFAIL_NOTAVAIL: return xpcBteNotAvailable;
1217 default: return xpcBteUnmappedError;
1218 }
1219}
1220
1221
1222
1223static inline void *
1224xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
1225{
1226 /* see if kmalloc will give us cachline aligned memory by default */
1227 *base = kmalloc(size, flags);
1228 if (*base == NULL) {
1229 return NULL;
1230 }
1231 if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
1232 return *base;
1233 }
1234 kfree(*base);
1235
1236 /* nope, we'll have to do it ourselves */
1237 *base = kmalloc(size + L1_CACHE_BYTES, flags);
1238 if (*base == NULL) {
1239 return NULL;
1240 }
1241 return (void *) L1_CACHE_ALIGN((u64) *base);
1242}
1243
1244
1245/*
1246 * Check to see if there is any channel activity to/from the specified
1247 * partition.
1248 */
1249static inline void
1250xpc_check_for_channel_activity(struct xpc_partition *part)
1251{
1252 u64 IPI_amo;
1253 unsigned long irq_flags;
1254
1255
1256 IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
1257 if (IPI_amo == 0) {
1258 return;
1259 }
1260
1261 spin_lock_irqsave(&part->IPI_lock, irq_flags);
1262 part->local_IPI_amo |= IPI_amo;
1263 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
1264
1265 dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
1266 XPC_PARTID(part), IPI_amo);
1267
1268 xpc_wakeup_channel_mgr(part);
1269}
1270
1271
1272#endif /* _IA64_SN_KERNEL_XPC_H */
1273
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index abf4fc2a87bb..0c0a68902409 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. 6 * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
7 */ 7 */
8 8
9 9
@@ -24,7 +24,7 @@
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <asm/sn/bte.h> 25#include <asm/sn/bte.h>
26#include <asm/sn/sn_sal.h> 26#include <asm/sn/sn_sal.h>
27#include "xpc.h" 27#include <asm/sn/xpc.h>
28 28
29 29
30/* 30/*
@@ -779,6 +779,12 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
779 779
780 /* both sides are disconnected now */ 780 /* both sides are disconnected now */
781 781
782 if (ch->flags & XPC_C_CONNECTCALLOUT) {
783 spin_unlock_irqrestore(&ch->lock, *irq_flags);
784 xpc_disconnect_callout(ch, xpcDisconnected);
785 spin_lock_irqsave(&ch->lock, *irq_flags);
786 }
787
782 /* it's now safe to free the channel's message queues */ 788 /* it's now safe to free the channel's message queues */
783 xpc_free_msgqueues(ch); 789 xpc_free_msgqueues(ch);
784 790
@@ -1645,7 +1651,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1645 1651
1646 1652
1647void 1653void
1648xpc_disconnecting_callout(struct xpc_channel *ch) 1654xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
1649{ 1655{
1650 /* 1656 /*
1651 * Let the channel's registerer know that the channel is being 1657 * Let the channel's registerer know that the channel is being
@@ -1654,15 +1660,13 @@ xpc_disconnecting_callout(struct xpc_channel *ch)
1654 */ 1660 */
1655 1661
1656 if (ch->func != NULL) { 1662 if (ch->func != NULL) {
1657 dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting," 1663 dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
1658 " partid=%d, channel=%d\n", ch->partid, ch->number); 1664 "channel=%d\n", reason, ch->partid, ch->number);
1659 1665
1660 ch->func(xpcDisconnecting, ch->partid, ch->number, NULL, 1666 ch->func(reason, ch->partid, ch->number, NULL, ch->key);
1661 ch->key);
1662 1667
1663 dev_dbg(xpc_chan, "ch->func() returned, reason=" 1668 dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
1664 "xpcDisconnecting, partid=%d, channel=%d\n", 1669 "channel=%d\n", reason, ch->partid, ch->number);
1665 ch->partid, ch->number);
1666 } 1670 }
1667} 1671}
1668 1672
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index b617236524c6..8930586e0eb4 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. 6 * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
7 */ 7 */
8 8
9 9
@@ -59,7 +59,7 @@
59#include <asm/sn/sn_sal.h> 59#include <asm/sn/sn_sal.h>
60#include <asm/kdebug.h> 60#include <asm/kdebug.h>
61#include <asm/uaccess.h> 61#include <asm/uaccess.h>
62#include "xpc.h" 62#include <asm/sn/xpc.h>
63 63
64 64
65/* define two XPC debug device structures to be used with dev_dbg() et al */ 65/* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -82,6 +82,9 @@ struct device *xpc_part = &xpc_part_dbg_subname;
82struct device *xpc_chan = &xpc_chan_dbg_subname; 82struct device *xpc_chan = &xpc_chan_dbg_subname;
83 83
84 84
85static int xpc_kdebug_ignore;
86
87
85/* systune related variables for /proc/sys directories */ 88/* systune related variables for /proc/sys directories */
86 89
87static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL; 90static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
@@ -162,6 +165,8 @@ static ctl_table xpc_sys_dir[] = {
162}; 165};
163static struct ctl_table_header *xpc_sysctl; 166static struct ctl_table_header *xpc_sysctl;
164 167
168/* non-zero if any remote partition disengage request was timed out */
169int xpc_disengage_request_timedout;
165 170
166/* #of IRQs received */ 171/* #of IRQs received */
167static atomic_t xpc_act_IRQ_rcvd; 172static atomic_t xpc_act_IRQ_rcvd;
@@ -773,7 +778,7 @@ xpc_daemonize_kthread(void *args)
773 ch->flags |= XPC_C_DISCONNECTCALLOUT; 778 ch->flags |= XPC_C_DISCONNECTCALLOUT;
774 spin_unlock_irqrestore(&ch->lock, irq_flags); 779 spin_unlock_irqrestore(&ch->lock, irq_flags);
775 780
776 xpc_disconnecting_callout(ch); 781 xpc_disconnect_callout(ch, xpcDisconnecting);
777 } else { 782 } else {
778 spin_unlock_irqrestore(&ch->lock, irq_flags); 783 spin_unlock_irqrestore(&ch->lock, irq_flags);
779 } 784 }
@@ -921,9 +926,9 @@ static void
921xpc_do_exit(enum xpc_retval reason) 926xpc_do_exit(enum xpc_retval reason)
922{ 927{
923 partid_t partid; 928 partid_t partid;
924 int active_part_count; 929 int active_part_count, printed_waiting_msg = 0;
925 struct xpc_partition *part; 930 struct xpc_partition *part;
926 unsigned long printmsg_time; 931 unsigned long printmsg_time, disengage_request_timeout = 0;
927 932
928 933
929 /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */ 934 /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
@@ -953,7 +958,8 @@ xpc_do_exit(enum xpc_retval reason)
953 958
954 /* wait for all partitions to become inactive */ 959 /* wait for all partitions to become inactive */
955 960
956 printmsg_time = jiffies; 961 printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
962 xpc_disengage_request_timedout = 0;
957 963
958 do { 964 do {
959 active_part_count = 0; 965 active_part_count = 0;
@@ -969,20 +975,39 @@ xpc_do_exit(enum xpc_retval reason)
969 active_part_count++; 975 active_part_count++;
970 976
971 XPC_DEACTIVATE_PARTITION(part, reason); 977 XPC_DEACTIVATE_PARTITION(part, reason);
972 }
973 978
974 if (active_part_count == 0) { 979 if (part->disengage_request_timeout >
975 break; 980 disengage_request_timeout) {
981 disengage_request_timeout =
982 part->disengage_request_timeout;
983 }
976 } 984 }
977 985
978 if (jiffies >= printmsg_time) { 986 if (xpc_partition_engaged(-1UL)) {
979 dev_info(xpc_part, "waiting for partitions to " 987 if (time_after(jiffies, printmsg_time)) {
980 "deactivate/disengage, active count=%d, remote " 988 dev_info(xpc_part, "waiting for remote "
981 "engaged=0x%lx\n", active_part_count, 989 "partitions to disengage, timeout in "
982 xpc_partition_engaged(1UL << partid)); 990 "%ld seconds\n",
983 991 (disengage_request_timeout - jiffies)
984 printmsg_time = jiffies + 992 / HZ);
993 printmsg_time = jiffies +
985 (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ); 994 (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
995 printed_waiting_msg = 1;
996 }
997
998 } else if (active_part_count > 0) {
999 if (printed_waiting_msg) {
1000 dev_info(xpc_part, "waiting for local partition"
1001 " to disengage\n");
1002 printed_waiting_msg = 0;
1003 }
1004
1005 } else {
1006 if (!xpc_disengage_request_timedout) {
1007 dev_info(xpc_part, "all partitions have "
1008 "disengaged\n");
1009 }
1010 break;
986 } 1011 }
987 1012
988 /* sleep for a 1/3 of a second or so */ 1013 /* sleep for a 1/3 of a second or so */
@@ -1000,11 +1025,13 @@ xpc_do_exit(enum xpc_retval reason)
1000 del_timer_sync(&xpc_hb_timer); 1025 del_timer_sync(&xpc_hb_timer);
1001 DBUG_ON(xpc_vars->heartbeating_to_mask != 0); 1026 DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
1002 1027
1003 /* take ourselves off of the reboot_notifier_list */ 1028 if (reason == xpcUnloading) {
1004 (void) unregister_reboot_notifier(&xpc_reboot_notifier); 1029 /* take ourselves off of the reboot_notifier_list */
1030 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1005 1031
1006 /* take ourselves off of the die_notifier list */ 1032 /* take ourselves off of the die_notifier list */
1007 (void) unregister_die_notifier(&xpc_die_notifier); 1033 (void) unregister_die_notifier(&xpc_die_notifier);
1034 }
1008 1035
1009 /* close down protections for IPI operations */ 1036 /* close down protections for IPI operations */
1010 xpc_restrict_IPI_ops(); 1037 xpc_restrict_IPI_ops();
@@ -1020,7 +1047,35 @@ xpc_do_exit(enum xpc_retval reason)
1020 1047
1021 1048
1022/* 1049/*
1023 * Called when the system is about to be either restarted or halted. 1050 * This function is called when the system is being rebooted.
1051 */
1052static int
1053xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1054{
1055 enum xpc_retval reason;
1056
1057
1058 switch (event) {
1059 case SYS_RESTART:
1060 reason = xpcSystemReboot;
1061 break;
1062 case SYS_HALT:
1063 reason = xpcSystemHalt;
1064 break;
1065 case SYS_POWER_OFF:
1066 reason = xpcSystemPoweroff;
1067 break;
1068 default:
1069 reason = xpcSystemGoingDown;
1070 }
1071
1072 xpc_do_exit(reason);
1073 return NOTIFY_DONE;
1074}
1075
1076
1077/*
1078 * Notify other partitions to disengage from all references to our memory.
1024 */ 1079 */
1025static void 1080static void
1026xpc_die_disengage(void) 1081xpc_die_disengage(void)
@@ -1028,7 +1083,7 @@ xpc_die_disengage(void)
1028 struct xpc_partition *part; 1083 struct xpc_partition *part;
1029 partid_t partid; 1084 partid_t partid;
1030 unsigned long engaged; 1085 unsigned long engaged;
1031 long time, print_time, disengage_request_timeout; 1086 long time, printmsg_time, disengage_request_timeout;
1032 1087
1033 1088
1034 /* keep xpc_hb_checker thread from doing anything (just in case) */ 1089 /* keep xpc_hb_checker thread from doing anything (just in case) */
@@ -1055,57 +1110,53 @@ xpc_die_disengage(void)
1055 } 1110 }
1056 } 1111 }
1057 1112
1058 print_time = rtc_time(); 1113 time = rtc_time();
1059 disengage_request_timeout = print_time + 1114 printmsg_time = time +
1115 (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
1116 disengage_request_timeout = time +
1060 (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); 1117 (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
1061 1118
1062 /* wait for all other partitions to disengage from us */ 1119 /* wait for all other partitions to disengage from us */
1063 1120
1064 while ((engaged = xpc_partition_engaged(-1UL)) && 1121 while (1) {
1065 (time = rtc_time()) < disengage_request_timeout) { 1122 engaged = xpc_partition_engaged(-1UL);
1123 if (!engaged) {
1124 dev_info(xpc_part, "all partitions have disengaged\n");
1125 break;
1126 }
1066 1127
1067 if (time >= print_time) { 1128 time = rtc_time();
1129 if (time >= disengage_request_timeout) {
1130 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1131 if (engaged & (1UL << partid)) {
1132 dev_info(xpc_part, "disengage from "
1133 "remote partition %d timed "
1134 "out\n", partid);
1135 }
1136 }
1137 break;
1138 }
1139
1140 if (time >= printmsg_time) {
1068 dev_info(xpc_part, "waiting for remote partitions to " 1141 dev_info(xpc_part, "waiting for remote partitions to "
1069 "disengage, engaged=0x%lx\n", engaged); 1142 "disengage, timeout in %ld seconds\n",
1070 print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL * 1143 (disengage_request_timeout - time) /
1144 sn_rtc_cycles_per_second);
1145 printmsg_time = time +
1146 (XPC_DISENGAGE_PRINTMSG_INTERVAL *
1071 sn_rtc_cycles_per_second); 1147 sn_rtc_cycles_per_second);
1072 } 1148 }
1073 } 1149 }
1074 dev_info(xpc_part, "finished waiting for remote partitions to "
1075 "disengage, engaged=0x%lx\n", engaged);
1076}
1077
1078
1079/*
1080 * This function is called when the system is being rebooted.
1081 */
1082static int
1083xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1084{
1085 enum xpc_retval reason;
1086
1087
1088 switch (event) {
1089 case SYS_RESTART:
1090 reason = xpcSystemReboot;
1091 break;
1092 case SYS_HALT:
1093 reason = xpcSystemHalt;
1094 break;
1095 case SYS_POWER_OFF:
1096 reason = xpcSystemPoweroff;
1097 break;
1098 default:
1099 reason = xpcSystemGoingDown;
1100 }
1101
1102 xpc_do_exit(reason);
1103 return NOTIFY_DONE;
1104} 1150}
1105 1151
1106 1152
1107/* 1153/*
1108 * This function is called when the system is being rebooted. 1154 * This function is called when the system is being restarted or halted due
1155 * to some sort of system failure. If this is the case we need to notify the
1156 * other partitions to disengage from all references to our memory.
1157 * This function can also be called when our heartbeater could be offlined
1158 * for a time. In this case we need to notify other partitions to not worry
1159 * about the lack of a heartbeat.
1109 */ 1160 */
1110static int 1161static int
1111xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) 1162xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
@@ -1115,11 +1166,25 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1115 case DIE_MACHINE_HALT: 1166 case DIE_MACHINE_HALT:
1116 xpc_die_disengage(); 1167 xpc_die_disengage();
1117 break; 1168 break;
1169
1170 case DIE_KDEBUG_ENTER:
1171 /* Should lack of heartbeat be ignored by other partitions? */
1172 if (!xpc_kdebug_ignore) {
1173 break;
1174 }
1175 /* fall through */
1118 case DIE_MCA_MONARCH_ENTER: 1176 case DIE_MCA_MONARCH_ENTER:
1119 case DIE_INIT_MONARCH_ENTER: 1177 case DIE_INIT_MONARCH_ENTER:
1120 xpc_vars->heartbeat++; 1178 xpc_vars->heartbeat++;
1121 xpc_vars->heartbeat_offline = 1; 1179 xpc_vars->heartbeat_offline = 1;
1122 break; 1180 break;
1181
1182 case DIE_KDEBUG_LEAVE:
1183 /* Is lack of heartbeat being ignored by other partitions? */
1184 if (!xpc_kdebug_ignore) {
1185 break;
1186 }
1187 /* fall through */
1123 case DIE_MCA_MONARCH_LEAVE: 1188 case DIE_MCA_MONARCH_LEAVE:
1124 case DIE_INIT_MONARCH_LEAVE: 1189 case DIE_INIT_MONARCH_LEAVE:
1125 xpc_vars->heartbeat++; 1190 xpc_vars->heartbeat++;
@@ -1344,3 +1409,7 @@ module_param(xpc_disengage_request_timelimit, int, 0);
1344MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait " 1409MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
1345 "for disengage request to complete."); 1410 "for disengage request to complete.");
1346 1411
1412module_param(xpc_kdebug_ignore, int, 0);
1413MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
1414 "other partitions when dropping into kdebug.");
1415
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index cdd6431853a1..88a730e6cfdb 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved. 6 * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
7 */ 7 */
8 8
9 9
@@ -28,7 +28,7 @@
28#include <asm/sn/sn_sal.h> 28#include <asm/sn/sn_sal.h>
29#include <asm/sn/nodepda.h> 29#include <asm/sn/nodepda.h>
30#include <asm/sn/addrs.h> 30#include <asm/sn/addrs.h>
31#include "xpc.h" 31#include <asm/sn/xpc.h>
32 32
33 33
34/* XPC is exiting flag */ 34/* XPC is exiting flag */
@@ -771,7 +771,8 @@ xpc_identify_act_IRQ_req(int nasid)
771 } 771 }
772 } 772 }
773 773
774 if (!xpc_partition_disengaged(part)) { 774 if (part->disengage_request_timeout > 0 &&
775 !xpc_partition_disengaged(part)) {
775 /* still waiting on other side to disengage from us */ 776 /* still waiting on other side to disengage from us */
776 return; 777 return;
777 } 778 }
@@ -873,6 +874,9 @@ xpc_partition_disengaged(struct xpc_partition *part)
873 * request in a timely fashion, so assume it's dead. 874 * request in a timely fashion, so assume it's dead.
874 */ 875 */
875 876
877 dev_info(xpc_part, "disengage from remote partition %d "
878 "timed out\n", partid);
879 xpc_disengage_request_timedout = 1;
876 xpc_clear_partition_engaged(1UL << partid); 880 xpc_clear_partition_engaged(1UL << partid);
877 disengaged = 1; 881 disengaged = 1;
878 } 882 }
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
index 34093476e965..e68332d93171 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -218,7 +218,9 @@ void sn_dma_flush(uint64_t addr)
218 uint64_t flags; 218 uint64_t flags;
219 uint64_t itte; 219 uint64_t itte;
220 struct hubdev_info *hubinfo; 220 struct hubdev_info *hubinfo;
221 volatile struct sn_flush_device_list *p; 221 volatile struct sn_flush_device_kernel *p;
222 volatile struct sn_flush_device_common *common;
223
222 struct sn_flush_nasid_entry *flush_nasid_list; 224 struct sn_flush_nasid_entry *flush_nasid_list;
223 225
224 if (!sn_ioif_inited) 226 if (!sn_ioif_inited)
@@ -268,17 +270,17 @@ void sn_dma_flush(uint64_t addr)
268 p = &flush_nasid_list->widget_p[wid_num][0]; 270 p = &flush_nasid_list->widget_p[wid_num][0];
269 271
270 /* find a matching BAR */ 272 /* find a matching BAR */
271 for (i = 0; i < DEV_PER_WIDGET; i++) { 273 for (i = 0; i < DEV_PER_WIDGET; i++,p++) {
274 common = p->common;
272 for (j = 0; j < PCI_ROM_RESOURCE; j++) { 275 for (j = 0; j < PCI_ROM_RESOURCE; j++) {
273 if (p->sfdl_bar_list[j].start == 0) 276 if (common->sfdl_bar_list[j].start == 0)
274 break; 277 break;
275 if (addr >= p->sfdl_bar_list[j].start 278 if (addr >= common->sfdl_bar_list[j].start
276 && addr <= p->sfdl_bar_list[j].end) 279 && addr <= common->sfdl_bar_list[j].end)
277 break; 280 break;
278 } 281 }
279 if (j < PCI_ROM_RESOURCE && p->sfdl_bar_list[j].start != 0) 282 if (j < PCI_ROM_RESOURCE && common->sfdl_bar_list[j].start != 0)
280 break; 283 break;
281 p++;
282 } 284 }
283 285
284 /* if no matching BAR, return without doing anything. */ 286 /* if no matching BAR, return without doing anything. */
@@ -304,24 +306,24 @@ void sn_dma_flush(uint64_t addr)
304 if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) { 306 if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) {
305 return; 307 return;
306 } else { 308 } else {
307 pcireg_wrb_flush_get(p->sfdl_pcibus_info, 309 pcireg_wrb_flush_get(common->sfdl_pcibus_info,
308 (p->sfdl_slot - 1)); 310 (common->sfdl_slot - 1));
309 } 311 }
310 } else { 312 } else {
311 spin_lock_irqsave(&((struct sn_flush_device_list *)p)-> 313 spin_lock_irqsave((spinlock_t *)&p->sfdl_flush_lock,
312 sfdl_flush_lock, flags); 314 flags);
313 315 *common->sfdl_flush_addr = 0;
314 *p->sfdl_flush_addr = 0;
315 316
316 /* force an interrupt. */ 317 /* force an interrupt. */
317 *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1; 318 *(volatile uint32_t *)(common->sfdl_force_int_addr) = 1;
318 319
319 /* wait for the interrupt to come back. */ 320 /* wait for the interrupt to come back. */
320 while (*(p->sfdl_flush_addr) != 0x10f) 321 while (*(common->sfdl_flush_addr) != 0x10f)
321 cpu_relax(); 322 cpu_relax();
322 323
323 /* okay, everything is synched up. */ 324 /* okay, everything is synched up. */
324 spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags); 325 spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock,
326 flags);
325 } 327 }
326 return; 328 return;
327} 329}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
index 1f500c81002c..e328e948175d 100644
--- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -92,7 +92,8 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
92 cnodeid_t near_cnode; 92 cnodeid_t near_cnode;
93 struct hubdev_info *hubdev_info; 93 struct hubdev_info *hubdev_info;
94 struct pcibus_info *soft; 94 struct pcibus_info *soft;
95 struct sn_flush_device_list *sn_flush_device_list; 95 struct sn_flush_device_kernel *sn_flush_device_kernel;
96 struct sn_flush_device_common *common;
96 97
97 if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) { 98 if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) {
98 return NULL; 99 return NULL;
@@ -137,20 +138,19 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
137 hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo); 138 hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
138 139
139 if (hubdev_info->hdi_flush_nasid_list.widget_p) { 140 if (hubdev_info->hdi_flush_nasid_list.widget_p) {
140 sn_flush_device_list = hubdev_info->hdi_flush_nasid_list. 141 sn_flush_device_kernel = hubdev_info->hdi_flush_nasid_list.
141 widget_p[(int)soft->pbi_buscommon.bs_xid]; 142 widget_p[(int)soft->pbi_buscommon.bs_xid];
142 if (sn_flush_device_list) { 143 if (sn_flush_device_kernel) {
143 for (j = 0; j < DEV_PER_WIDGET; 144 for (j = 0; j < DEV_PER_WIDGET;
144 j++, sn_flush_device_list++) { 145 j++, sn_flush_device_kernel++) {
145 if (sn_flush_device_list->sfdl_slot == -1) 146 common = sn_flush_device_kernel->common;
147 if (common->sfdl_slot == -1)
146 continue; 148 continue;
147 if ((sn_flush_device_list-> 149 if ((common->sfdl_persistent_segment ==
148 sfdl_persistent_segment ==
149 soft->pbi_buscommon.bs_persist_segment) && 150 soft->pbi_buscommon.bs_persist_segment) &&
150 (sn_flush_device_list-> 151 (common->sfdl_persistent_busnum ==
151 sfdl_persistent_busnum ==
152 soft->pbi_buscommon.bs_persist_busnum)) 152 soft->pbi_buscommon.bs_persist_busnum))
153 sn_flush_device_list->sfdl_pcibus_info = 153 common->sfdl_pcibus_info =
154 soft; 154 soft;
155 } 155 }
156 } 156 }