aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/Kconfig19
-rw-r--r--arch/ia64/configs/tiger_defconfig2
-rw-r--r--arch/ia64/kernel/acpi.c14
-rw-r--r--arch/ia64/kernel/entry.S14
-rw-r--r--arch/ia64/kernel/iosapic.c6
-rw-r--r--arch/ia64/kernel/irq.c13
-rw-r--r--arch/ia64/kernel/mca.c90
-rw-r--r--arch/ia64/kernel/perfmon.c5
-rw-r--r--arch/ia64/kernel/signal.c101
-rw-r--r--arch/ia64/kernel/smpboot.c114
-rw-r--r--arch/ia64/kernel/time.c9
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/ia64/mm/contig.c4
-rw-r--r--arch/ia64/mm/discontig.c9
-rw-r--r--arch/ia64/sn/kernel/Makefile3
-rw-r--r--arch/ia64/sn/kernel/pio_phys.S71
-rw-r--r--arch/ia64/sn/kernel/setup.c6
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c21
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c102
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c1
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c28
-rw-r--r--arch/ia64/sn/pci/tioce_provider.c326
22 files changed, 725 insertions, 235 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index a85ea9d37f05..ff7ae6b664e8 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -271,6 +271,25 @@ config SCHED_SMT
271 Intel IA64 chips with MultiThreading at a cost of slightly increased 271 Intel IA64 chips with MultiThreading at a cost of slightly increased
272 overhead in some places. If unsure say N here. 272 overhead in some places. If unsure say N here.
273 273
274config PERMIT_BSP_REMOVE
275 bool "Support removal of Bootstrap Processor"
276 depends on HOTPLUG_CPU
277 default n
278 ---help---
279 Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
280 support.
281
282config FORCE_CPEI_RETARGET
283 bool "Force assumption that CPEI can be re-targetted"
284 depends on PERMIT_BSP_REMOVE
285 default n
286 ---help---
287 Say Y if you need to force the assumption that CPEI can be re-targetted to
288 any cpu in the system. This hint is available via ACPI 3.0 specifications.
289 Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
290 This option it useful to enable this feature on older BIOS's as well.
291 You can also enable this by using boot command line option force_cpei=1.
292
274config PREEMPT 293config PREEMPT
275 bool "Preemptible Kernel" 294 bool "Preemptible Kernel"
276 help 295 help
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 125568118b84..766bf4955432 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17
116CONFIG_SMP=y 116CONFIG_SMP=y
117CONFIG_NR_CPUS=4 117CONFIG_NR_CPUS=4
118CONFIG_HOTPLUG_CPU=y 118CONFIG_HOTPLUG_CPU=y
119CONFIG_PERMIT_BSP_REMOVE=y
120CONFIG_FORCE_CPEI_RETARGET=y
119# CONFIG_SCHED_SMT is not set 121# CONFIG_SCHED_SMT is not set
120# CONFIG_PREEMPT is not set 122# CONFIG_PREEMPT is not set
121CONFIG_SELECT_MEMORY_MODEL=y 123CONFIG_SELECT_MEMORY_MODEL=y
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index ecd44bdc8394..4722ec51c70c 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header,
284 return 0; 284 return 0;
285} 285}
286 286
287#ifdef CONFIG_HOTPLUG_CPU
287unsigned int can_cpei_retarget(void) 288unsigned int can_cpei_retarget(void)
288{ 289{
289 extern int cpe_vector; 290 extern int cpe_vector;
291 extern unsigned int force_cpei_retarget;
290 292
291 /* 293 /*
292 * Only if CPEI is supported and the override flag 294 * Only if CPEI is supported and the override flag
293 * is present, otherwise return that its re-targettable 295 * is present, otherwise return that its re-targettable
294 * if we are in polling mode. 296 * if we are in polling mode.
295 */ 297 */
296 if (cpe_vector > 0 && !acpi_cpei_override) 298 if (cpe_vector > 0) {
297 return 0; 299 if (acpi_cpei_override || force_cpei_retarget)
298 else 300 return 1;
299 return 1; 301 else
302 return 0;
303 }
304 return 1;
300} 305}
301 306
302unsigned int is_cpu_cpei_target(unsigned int cpu) 307unsigned int is_cpu_cpei_target(unsigned int cpu)
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu)
315{ 320{
316 acpi_cpei_phys_cpuid = cpu_physical_id(cpu); 321 acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
317} 322}
323#endif
318 324
319unsigned int get_cpei_target_cpu(void) 325unsigned int get_cpei_target_cpu(void)
320{ 326{
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 930fdfca6ddb..0e3eda99e549 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1102,9 +1102,6 @@ skip_rbs_switch:
1102 st8 [r2]=r8 1102 st8 [r2]=r8
1103 st8 [r3]=r10 1103 st8 [r3]=r10
1104.work_pending: 1104.work_pending:
1105 tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
1106(p6) br.cond.sptk.few .sigdelayed
1107 ;;
1108 tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? 1105 tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
1109(p6) br.cond.sptk.few .notify 1106(p6) br.cond.sptk.few .notify
1110#ifdef CONFIG_PREEMPT 1107#ifdef CONFIG_PREEMPT
@@ -1131,17 +1128,6 @@ skip_rbs_switch:
1131(pLvSys)br.cond.sptk.few .work_pending_syscall_end 1128(pLvSys)br.cond.sptk.few .work_pending_syscall_end
1132 br.cond.sptk.many .work_processed_kernel // don't re-check 1129 br.cond.sptk.many .work_processed_kernel // don't re-check
1133 1130
1134// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
1135// it could not be delivered. Deliver it now. The signal might be for us and
1136// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
1137// signal.
1138
1139.sigdelayed:
1140 br.call.sptk.many rp=do_sigdelayed
1141 cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
1142(pLvSys)br.cond.sptk.few .work_pending_syscall_end
1143 br.cond.sptk.many .work_processed_kernel // re-check
1144
1145.work_pending_syscall_end: 1131.work_pending_syscall_end:
1146 adds r2=PT(R8)+16,r12 1132 adds r2=PT(R8)+16,r12
1147 adds r3=PT(R10)+16,r12 1133 adds r3=PT(R10)+16,r12
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 574084f343fa..8832c553230a 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int vector)
631{ 631{
632#ifdef CONFIG_SMP 632#ifdef CONFIG_SMP
633 static int cpu = -1; 633 static int cpu = -1;
634 extern int cpe_vector;
634 635
635 /* 636 /*
636 * In case of vector shared by multiple RTEs, all RTEs that 637 * In case of vector shared by multiple RTEs, all RTEs that
@@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int vector)
653 if (!cpu_online(smp_processor_id())) 654 if (!cpu_online(smp_processor_id()))
654 return cpu_physical_id(smp_processor_id()); 655 return cpu_physical_id(smp_processor_id());
655 656
657#ifdef CONFIG_ACPI
658 if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
659 return get_cpei_target_cpu();
660#endif
661
656#ifdef CONFIG_NUMA 662#ifdef CONFIG_NUMA
657 { 663 {
658 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; 664 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index d33244c32759..5ce908ef9c95 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -163,8 +163,19 @@ void fixup_irqs(void)
163{ 163{
164 unsigned int irq; 164 unsigned int irq;
165 extern void ia64_process_pending_intr(void); 165 extern void ia64_process_pending_intr(void);
166 extern void ia64_disable_timer(void);
167 extern volatile int time_keeper_id;
168
169 ia64_disable_timer();
170
171 /*
172 * Find a new timesync master
173 */
174 if (smp_processor_id() == time_keeper_id) {
175 time_keeper_id = first_cpu(cpu_online_map);
176 printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
177 }
166 178
167 ia64_set_itv(1<<16);
168 /* 179 /*
169 * Phase 1: Locate irq's bound to this cpu and 180 * Phase 1: Locate irq's bound to this cpu and
170 * relocate them for cpu removal. 181 * relocate them for cpu removal.
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index ee7eec9ee576..b57e723f194c 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -281,14 +281,10 @@ ia64_mca_log_sal_error_record(int sal_info_type)
281 ia64_sal_clear_state_info(sal_info_type); 281 ia64_sal_clear_state_info(sal_info_type);
282} 282}
283 283
284/*
285 * platform dependent error handling
286 */
287#ifndef PLATFORM_MCA_HANDLERS
288
289#ifdef CONFIG_ACPI 284#ifdef CONFIG_ACPI
290 285
291int cpe_vector = -1; 286int cpe_vector = -1;
287int ia64_cpe_irq = -1;
292 288
293static irqreturn_t 289static irqreturn_t
294ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) 290ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
@@ -377,8 +373,6 @@ ia64_mca_register_cpev (int cpev)
377} 373}
378#endif /* CONFIG_ACPI */ 374#endif /* CONFIG_ACPI */
379 375
380#endif /* PLATFORM_MCA_HANDLERS */
381
382/* 376/*
383 * ia64_mca_cmc_vector_setup 377 * ia64_mca_cmc_vector_setup
384 * 378 *
@@ -630,6 +624,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
630 *tnat |= (nat << tslot); 624 *tnat |= (nat << tslot);
631} 625}
632 626
627/* Change the comm field on the MCA/INT task to include the pid that
628 * was interrupted, it makes for easier debugging. If that pid was 0
629 * (swapper or nested MCA/INIT) then use the start of the previous comm
630 * field suffixed with its cpu.
631 */
632
633static void
634ia64_mca_modify_comm(const task_t *previous_current)
635{
636 char *p, comm[sizeof(current->comm)];
637 if (previous_current->pid)
638 snprintf(comm, sizeof(comm), "%s %d",
639 current->comm, previous_current->pid);
640 else {
641 int l;
642 if ((p = strchr(previous_current->comm, ' ')))
643 l = p - previous_current->comm;
644 else
645 l = strlen(previous_current->comm);
646 snprintf(comm, sizeof(comm), "%s %*s %d",
647 current->comm, l, previous_current->comm,
648 task_thread_info(previous_current)->cpu);
649 }
650 memcpy(current->comm, comm, sizeof(current->comm));
651}
652
633/* On entry to this routine, we are running on the per cpu stack, see 653/* On entry to this routine, we are running on the per cpu stack, see
634 * mca_asm.h. The original stack has not been touched by this event. Some of 654 * mca_asm.h. The original stack has not been touched by this event. Some of
635 * the original stack's registers will be in the RBS on this stack. This stack 655 * the original stack's registers will be in the RBS on this stack. This stack
@@ -648,7 +668,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
648 struct ia64_sal_os_state *sos, 668 struct ia64_sal_os_state *sos,
649 const char *type) 669 const char *type)
650{ 670{
651 char *p, comm[sizeof(current->comm)]; 671 char *p;
652 ia64_va va; 672 ia64_va va;
653 extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ 673 extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */
654 const pal_min_state_area_t *ms = sos->pal_min_state; 674 const pal_min_state_area_t *ms = sos->pal_min_state;
@@ -721,6 +741,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
721 /* Verify the previous stack state before we change it */ 741 /* Verify the previous stack state before we change it */
722 if (user_mode(regs)) { 742 if (user_mode(regs)) {
723 msg = "occurred in user space"; 743 msg = "occurred in user space";
744 /* previous_current is guaranteed to be valid when the task was
745 * in user space, so ...
746 */
747 ia64_mca_modify_comm(previous_current);
724 goto no_mod; 748 goto no_mod;
725 } 749 }
726 if (r13 != sos->prev_IA64_KR_CURRENT) { 750 if (r13 != sos->prev_IA64_KR_CURRENT) {
@@ -750,25 +774,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
750 goto no_mod; 774 goto no_mod;
751 } 775 }
752 776
753 /* Change the comm field on the MCA/INT task to include the pid that 777 ia64_mca_modify_comm(previous_current);
754 * was interrupted, it makes for easier debugging. If that pid was 0
755 * (swapper or nested MCA/INIT) then use the start of the previous comm
756 * field suffixed with its cpu.
757 */
758 if (previous_current->pid)
759 snprintf(comm, sizeof(comm), "%s %d",
760 current->comm, previous_current->pid);
761 else {
762 int l;
763 if ((p = strchr(previous_current->comm, ' ')))
764 l = p - previous_current->comm;
765 else
766 l = strlen(previous_current->comm);
767 snprintf(comm, sizeof(comm), "%s %*s %d",
768 current->comm, l, previous_current->comm,
769 task_thread_info(previous_current)->cpu);
770 }
771 memcpy(current->comm, comm, sizeof(current->comm));
772 778
773 /* Make the original task look blocked. First stack a struct pt_regs, 779 /* Make the original task look blocked. First stack a struct pt_regs,
774 * describing the state at the time of interrupt. mca_asm.S built a 780 * describing the state at the time of interrupt. mca_asm.S built a
@@ -908,7 +914,7 @@ no_mod:
908static void 914static void
909ia64_wait_for_slaves(int monarch) 915ia64_wait_for_slaves(int monarch)
910{ 916{
911 int c, wait = 0; 917 int c, wait = 0, missing = 0;
912 for_each_online_cpu(c) { 918 for_each_online_cpu(c) {
913 if (c == monarch) 919 if (c == monarch)
914 continue; 920 continue;
@@ -919,15 +925,32 @@ ia64_wait_for_slaves(int monarch)
919 } 925 }
920 } 926 }
921 if (!wait) 927 if (!wait)
922 return; 928 goto all_in;
923 for_each_online_cpu(c) { 929 for_each_online_cpu(c) {
924 if (c == monarch) 930 if (c == monarch)
925 continue; 931 continue;
926 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { 932 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
927 udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ 933 udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */
934 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
935 missing = 1;
928 break; 936 break;
929 } 937 }
930 } 938 }
939 if (!missing)
940 goto all_in;
941 printk(KERN_INFO "OS MCA slave did not rendezvous on cpu");
942 for_each_online_cpu(c) {
943 if (c == monarch)
944 continue;
945 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
946 printk(" %d", c);
947 }
948 printk("\n");
949 return;
950
951all_in:
952 printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n");
953 return;
931} 954}
932 955
933/* 956/*
@@ -953,6 +976,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
953 task_t *previous_current; 976 task_t *previous_current;
954 977
955 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ 978 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
979 console_loglevel = 15; /* make sure printks make it to console */
980 printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
981 sos->proc_state_param, cpu, sos->monarch);
982
956 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); 983 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
957 monarch_cpu = cpu; 984 monarch_cpu = cpu;
958 if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) 985 if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)
@@ -1444,11 +1471,13 @@ void __devinit
1444ia64_mca_cpu_init(void *cpu_data) 1471ia64_mca_cpu_init(void *cpu_data)
1445{ 1472{
1446 void *pal_vaddr; 1473 void *pal_vaddr;
1474 static int first_time = 1;
1447 1475
1448 if (smp_processor_id() == 0) { 1476 if (first_time) {
1449 void *mca_data; 1477 void *mca_data;
1450 int cpu; 1478 int cpu;
1451 1479
1480 first_time = 0;
1452 mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) 1481 mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
1453 * NR_CPUS + KERNEL_STACK_SIZE); 1482 * NR_CPUS + KERNEL_STACK_SIZE);
1454 mca_data = (void *)(((unsigned long)mca_data + 1483 mca_data = (void *)(((unsigned long)mca_data +
@@ -1704,6 +1733,7 @@ ia64_mca_late_init(void)
1704 desc = irq_descp(irq); 1733 desc = irq_descp(irq);
1705 desc->status |= IRQ_PER_CPU; 1734 desc->status |= IRQ_PER_CPU;
1706 setup_irq(irq, &mca_cpe_irqaction); 1735 setup_irq(irq, &mca_cpe_irqaction);
1736 ia64_cpe_irq = irq;
1707 } 1737 }
1708 ia64_mca_register_cpev(cpe_vector); 1738 ia64_mca_register_cpev(cpe_vector);
1709 IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); 1739 IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9c5194b385da..077f21216b65 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6722,6 +6722,7 @@ __initcall(pfm_init);
6722void 6722void
6723pfm_init_percpu (void) 6723pfm_init_percpu (void)
6724{ 6724{
6725 static int first_time=1;
6725 /* 6726 /*
6726 * make sure no measurement is active 6727 * make sure no measurement is active
6727 * (may inherit programmed PMCs from EFI). 6728 * (may inherit programmed PMCs from EFI).
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void)
6734 */ 6735 */
6735 pfm_unfreeze_pmu(); 6736 pfm_unfreeze_pmu();
6736 6737
6737 if (smp_processor_id() == 0) 6738 if (first_time) {
6738 register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); 6739 register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
6740 first_time=0;
6741 }
6739 6742
6740 ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); 6743 ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
6741 ia64_srlz_d(); 6744 ia64_srlz_d();
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 463f6bb44d07..1d7903ee2126 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
588 } 588 }
589 return 0; 589 return 0;
590} 590}
591
592/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
593 * could not be delivered. It is important that the target process is not
594 * allowed to do any more work in user space. Possible cases for the target
595 * process:
596 *
597 * - It is sleeping and will wake up soon. Store the data in the current task,
598 * the signal will be sent when the current task returns from the next
599 * interrupt.
600 *
601 * - It is running in user context. Store the data in the current task, the
602 * signal will be sent when the current task returns from the next interrupt.
603 *
604 * - It is running in kernel context on this or another cpu and will return to
605 * user context. Store the data in the target task, the signal will be sent
606 * to itself when the target task returns to user space.
607 *
608 * - It is running in kernel context on this cpu and will sleep before
609 * returning to user context. Because this is also the current task, the
610 * signal will not get delivered and the task could sleep indefinitely.
611 * Store the data in the idle task for this cpu, the signal will be sent
612 * after the idle task processes its next interrupt.
613 *
614 * To cover all cases, store the data in the target task, the current task and
615 * the idle task on this cpu. Whatever happens, the signal will be delivered
616 * to the target task before it can do any useful user space work. Multiple
617 * deliveries have no unwanted side effects.
618 *
619 * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
620 * disabled. It must not take any locks nor use kernel structures or services
621 * that require locks.
622 */
623
624/* To ensure that we get the right pid, check its start time. To avoid extra
625 * include files in thread_info.h, convert the task start_time to unsigned long,
626 * giving us a cycle time of > 580 years.
627 */
628static inline unsigned long
629start_time_ul(const struct task_struct *t)
630{
631 return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
632}
633
634void
635set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
636{
637 struct task_struct *t;
638 unsigned long start_time = 0;
639 int i;
640
641 for (i = 1; i <= 3; ++i) {
642 switch (i) {
643 case 1:
644 t = find_task_by_pid(pid);
645 if (t)
646 start_time = start_time_ul(t);
647 break;
648 case 2:
649 t = current;
650 break;
651 default:
652 t = idle_task(smp_processor_id());
653 break;
654 }
655
656 if (!t)
657 return;
658 task_thread_info(t)->sigdelayed.signo = signo;
659 task_thread_info(t)->sigdelayed.code = code;
660 task_thread_info(t)->sigdelayed.addr = addr;
661 task_thread_info(t)->sigdelayed.start_time = start_time;
662 task_thread_info(t)->sigdelayed.pid = pid;
663 wmb();
664 set_tsk_thread_flag(t, TIF_SIGDELAYED);
665 }
666}
667
668/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
669 * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
670 */
671
672void
673do_sigdelayed(void)
674{
675 struct siginfo siginfo;
676 pid_t pid;
677 struct task_struct *t;
678
679 clear_thread_flag(TIF_SIGDELAYED);
680 memset(&siginfo, 0, sizeof(siginfo));
681 siginfo.si_signo = current_thread_info()->sigdelayed.signo;
682 siginfo.si_code = current_thread_info()->sigdelayed.code;
683 siginfo.si_addr = current_thread_info()->sigdelayed.addr;
684 pid = current_thread_info()->sigdelayed.pid;
685 t = find_task_by_pid(pid);
686 if (!t)
687 return;
688 if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
689 return;
690 force_sig_info(siginfo.si_signo, &siginfo, t);
691}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index b681ef34a86e..c4b633b36dab 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -70,6 +70,12 @@
70#endif 70#endif
71 71
72#ifdef CONFIG_HOTPLUG_CPU 72#ifdef CONFIG_HOTPLUG_CPU
73#ifdef CONFIG_PERMIT_BSP_REMOVE
74#define bsp_remove_ok 1
75#else
76#define bsp_remove_ok 0
77#endif
78
73/* 79/*
74 * Store all idle threads, this can be reused instead of creating 80 * Store all idle threads, this can be reused instead of creating
75 * a new thread. Also avoids complicated thread destroy functionality 81 * a new thread. Also avoids complicated thread destroy functionality
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
104/* 110/*
105 * ITC synchronization related stuff: 111 * ITC synchronization related stuff:
106 */ 112 */
107#define MASTER 0 113#define MASTER (0)
108#define SLAVE (SMP_CACHE_BYTES/8) 114#define SLAVE (SMP_CACHE_BYTES/8)
109 115
110#define NUM_ROUNDS 64 /* magic value */ 116#define NUM_ROUNDS 64 /* magic value */
@@ -151,6 +157,27 @@ char __initdata no_int_routing;
151 157
152unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ 158unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
153 159
160#ifdef CONFIG_FORCE_CPEI_RETARGET
161#define CPEI_OVERRIDE_DEFAULT (1)
162#else
163#define CPEI_OVERRIDE_DEFAULT (0)
164#endif
165
166unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
167
168static int __init
169cmdl_force_cpei(char *str)
170{
171 int value=0;
172
173 get_option (&str, &value);
174 force_cpei_retarget = value;
175
176 return 1;
177}
178
179__setup("force_cpei=", cmdl_force_cpei);
180
154static int __init 181static int __init
155nointroute (char *str) 182nointroute (char *str)
156{ 183{
@@ -161,6 +188,27 @@ nointroute (char *str)
161 188
162__setup("nointroute", nointroute); 189__setup("nointroute", nointroute);
163 190
191static void fix_b0_for_bsp(void)
192{
193#ifdef CONFIG_HOTPLUG_CPU
194 int cpuid;
195 static int fix_bsp_b0 = 1;
196
197 cpuid = smp_processor_id();
198
199 /*
200 * Cache the b0 value on the first AP that comes up
201 */
202 if (!(fix_bsp_b0 && cpuid))
203 return;
204
205 sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
206 printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
207
208 fix_bsp_b0 = 0;
209#endif
210}
211
164void 212void
165sync_master (void *arg) 213sync_master (void *arg)
166{ 214{
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void)
327static void __devinit 375static void __devinit
328smp_callin (void) 376smp_callin (void)
329{ 377{
330 int cpuid, phys_id; 378 int cpuid, phys_id, itc_master;
331 extern void ia64_init_itm(void); 379 extern void ia64_init_itm(void);
380 extern volatile int time_keeper_id;
332 381
333#ifdef CONFIG_PERFMON 382#ifdef CONFIG_PERFMON
334 extern void pfm_init_percpu(void); 383 extern void pfm_init_percpu(void);
@@ -336,6 +385,7 @@ smp_callin (void)
336 385
337 cpuid = smp_processor_id(); 386 cpuid = smp_processor_id();
338 phys_id = hard_smp_processor_id(); 387 phys_id = hard_smp_processor_id();
388 itc_master = time_keeper_id;
339 389
340 if (cpu_online(cpuid)) { 390 if (cpu_online(cpuid)) {
341 printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", 391 printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
@@ -343,6 +393,8 @@ smp_callin (void)
343 BUG(); 393 BUG();
344 } 394 }
345 395
396 fix_b0_for_bsp();
397
346 lock_ipi_calllock(); 398 lock_ipi_calllock();
347 cpu_set(cpuid, cpu_online_map); 399 cpu_set(cpuid, cpu_online_map);
348 unlock_ipi_calllock(); 400 unlock_ipi_calllock();
@@ -365,8 +417,8 @@ smp_callin (void)
365 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls 417 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
366 * local_bh_enable(), which bugs out if irqs are not enabled... 418 * local_bh_enable(), which bugs out if irqs are not enabled...
367 */ 419 */
368 Dprintk("Going to syncup ITC with BP.\n"); 420 Dprintk("Going to syncup ITC with ITC Master.\n");
369 ia64_sync_itc(0); 421 ia64_sync_itc(itc_master);
370 } 422 }
371 423
372 /* 424 /*
@@ -635,6 +687,47 @@ remove_siblinginfo(int cpu)
635} 687}
636 688
637extern void fixup_irqs(void); 689extern void fixup_irqs(void);
690
691int migrate_platform_irqs(unsigned int cpu)
692{
693 int new_cpei_cpu;
694 irq_desc_t *desc = NULL;
695 cpumask_t mask;
696 int retval = 0;
697
698 /*
699 * dont permit CPEI target to removed.
700 */
701 if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
702 printk ("CPU (%d) is CPEI Target\n", cpu);
703 if (can_cpei_retarget()) {
704 /*
705 * Now re-target the CPEI to a different processor
706 */
707 new_cpei_cpu = any_online_cpu(cpu_online_map);
708 mask = cpumask_of_cpu(new_cpei_cpu);
709 set_cpei_target_cpu(new_cpei_cpu);
710 desc = irq_descp(ia64_cpe_irq);
711 /*
712 * Switch for now, immediatly, we need to do fake intr
713 * as other interrupts, but need to study CPEI behaviour with
714 * polling before making changes.
715 */
716 if (desc) {
717 desc->handler->disable(ia64_cpe_irq);
718 desc->handler->set_affinity(ia64_cpe_irq, mask);
719 desc->handler->enable(ia64_cpe_irq);
720 printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
721 }
722 }
723 if (!desc) {
724 printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
725 retval = -EBUSY;
726 }
727 }
728 return retval;
729}
730
638/* must be called with cpucontrol mutex held */ 731/* must be called with cpucontrol mutex held */
639int __cpu_disable(void) 732int __cpu_disable(void)
640{ 733{
@@ -643,8 +736,17 @@ int __cpu_disable(void)
643 /* 736 /*
644 * dont permit boot processor for now 737 * dont permit boot processor for now
645 */ 738 */
646 if (cpu == 0) 739 if (cpu == 0 && !bsp_remove_ok) {
647 return -EBUSY; 740 printk ("Your platform does not support removal of BSP\n");
741 return (-EBUSY);
742 }
743
744 cpu_clear(cpu, cpu_online_map);
745
746 if (migrate_platform_irqs(cpu)) {
747 cpu_set(cpu, cpu_online_map);
748 return (-EBUSY);
749 }
648 750
649 remove_siblinginfo(cpu); 751 remove_siblinginfo(cpu);
650 cpu_clear(cpu, cpu_online_map); 752 cpu_clear(cpu, cpu_online_map);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 307d01e15b2e..ac167436e936 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -32,7 +32,7 @@
32 32
33extern unsigned long wall_jiffies; 33extern unsigned long wall_jiffies;
34 34
35#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ 35volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
36 36
37#ifdef CONFIG_IA64_DEBUG_IRQ 37#ifdef CONFIG_IA64_DEBUG_IRQ
38 38
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
71 71
72 new_itm += local_cpu_data->itm_delta; 72 new_itm += local_cpu_data->itm_delta;
73 73
74 if (smp_processor_id() == TIME_KEEPER_ID) { 74 if (smp_processor_id() == time_keeper_id) {
75 /* 75 /*
76 * Here we are in the timer irq handler. We have irqs locally 76 * Here we are in the timer irq handler. We have irqs locally
77 * disabled, but we don't know if the timer_bh is running on 77 * disabled, but we don't know if the timer_bh is running on
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = {
236 .name = "timer" 236 .name = "timer"
237}; 237};
238 238
239void __devinit ia64_disable_timer(void)
240{
241 ia64_set_itv(1 << 16);
242}
243
239void __init 244void __init
240time_init (void) 245time_init (void)
241{ 246{
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 6e5eea19fa67..3b6fd798c4d6 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
36 parent = &sysfs_nodes[cpu_to_node(num)]; 36 parent = &sysfs_nodes[cpu_to_node(num)];
37#endif /* CONFIG_NUMA */ 37#endif /* CONFIG_NUMA */
38 38
39#ifdef CONFIG_ACPI 39#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
40 /* 40 /*
41 * If CPEI cannot be re-targetted, and this is 41 * If CPEI cannot be re-targetted, and this is
42 * CPEI target, then dont create the control file 42 * CPEI target, then dont create the control file
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index acaaec4e4681..9855ba318094 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -181,13 +181,15 @@ per_cpu_init (void)
181{ 181{
182 void *cpu_data; 182 void *cpu_data;
183 int cpu; 183 int cpu;
184 static int first_time=1;
184 185
185 /* 186 /*
186 * get_free_pages() cannot be used before cpu_init() done. BSP 187 * get_free_pages() cannot be used before cpu_init() done. BSP
187 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls 188 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
188 * get_zeroed_page(). 189 * get_zeroed_page().
189 */ 190 */
190 if (smp_processor_id() == 0) { 191 if (first_time) {
192 first_time=0;
191 cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS, 193 cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
192 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 194 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
193 for (cpu = 0; cpu < NR_CPUS; cpu++) { 195 for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c87d6d1d5813..573d5cc63e2b 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -528,12 +528,17 @@ void __init find_memory(void)
528void *per_cpu_init(void) 528void *per_cpu_init(void)
529{ 529{
530 int cpu; 530 int cpu;
531 static int first_time = 1;
532
531 533
532 if (smp_processor_id() != 0) 534 if (smp_processor_id() != 0)
533 return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; 535 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
534 536
535 for (cpu = 0; cpu < NR_CPUS; cpu++) 537 if (first_time) {
536 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; 538 first_time = 0;
539 for (cpu = 0; cpu < NR_CPUS; cpu++)
540 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
541 }
537 542
538 return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; 543 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
539} 544}
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 3e9b4eea7418..ab9c48c88012 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -10,7 +10,8 @@
10CPPFLAGS += -I$(srctree)/arch/ia64/sn/include 10CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
11 11
12obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \ 12obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
13 huberror.o io_init.o iomv.o klconflib.o sn2/ 13 huberror.o io_init.o iomv.o klconflib.o pio_phys.o \
14 sn2/
14obj-$(CONFIG_IA64_GENERIC) += machvec.o 15obj-$(CONFIG_IA64_GENERIC) += machvec.o
15obj-$(CONFIG_SGI_TIOCX) += tiocx.o 16obj-$(CONFIG_SGI_TIOCX) += tiocx.o
16obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o 17obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S
new file mode 100644
index 000000000000..3c7d48d6ecb8
--- /dev/null
+++ b/arch/ia64/sn/kernel/pio_phys.S
@@ -0,0 +1,71 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
7 *
8 * This file contains macros used to access MMR registers via
9 * uncached physical addresses.
10 * pio_phys_read_mmr - read an MMR
11 * pio_phys_write_mmr - write an MMR
12 * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
13 * Second MMR will be skipped if address is NULL
14 *
15 * Addresses passed to these routines should be uncached physical addresses
16 * ie., 0x80000....
17 */
18
19
20
21#include <asm/asmmacro.h>
22#include <asm/page.h>
23
24GLOBAL_ENTRY(pio_phys_read_mmr)
25 .prologue
26 .regstk 1,0,0,0
27 .body
28 mov r2=psr
29 rsm psr.i | psr.dt
30 ;;
31 srlz.d
32 ld8.acq r8=[r32]
33 ;;
34 mov psr.l=r2;;
35 srlz.d
36 br.ret.sptk.many rp
37END(pio_phys_read_mmr)
38
39GLOBAL_ENTRY(pio_phys_write_mmr)
40 .prologue
41 .regstk 2,0,0,0
42 .body
43 mov r2=psr
44 rsm psr.i | psr.dt
45 ;;
46 srlz.d
47 st8.rel [r32]=r33
48 ;;
49 mov psr.l=r2;;
50 srlz.d
51 br.ret.sptk.many rp
52END(pio_phys_write_mmr)
53
54GLOBAL_ENTRY(pio_atomic_phys_write_mmrs)
55 .prologue
56 .regstk 4,0,0,0
57 .body
58 mov r2=psr
59 cmp.ne p9,p0=r34,r0;
60 rsm psr.i | psr.dt | psr.ic
61 ;;
62 srlz.d
63 st8.rel [r32]=r33
64(p9) st8.rel [r34]=r35
65 ;;
66 mov psr.l=r2;;
67 srlz.d
68 br.ret.sptk.many rp
69END(pio_atomic_phys_write_mmrs)
70
71
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 5b84836c2171..8b6d5c844708 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
3 * License. See the file "COPYING" in the main directory of this archive 3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details. 4 * for more details.
5 * 5 *
6 * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved. 6 * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
7 */ 7 */
8 8
9#include <linux/config.h> 9#include <linux/config.h>
@@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p)
498 * for sn. 498 * for sn.
499 */ 499 */
500 pm_power_off = ia64_sn_power_down; 500 pm_power_off = ia64_sn_power_down;
501 current->thread.flags |= IA64_THREAD_MIGRATION;
501} 502}
502 503
503/** 504/**
@@ -660,7 +661,8 @@ void __init sn_cpu_init(void)
660 SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; 661 SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
661 u64 *pio; 662 u64 *pio;
662 pio = is_shub1() ? pio1 : pio2; 663 pio = is_shub1() ? pio1 : pio2;
663 pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); 664 pda->pio_write_status_addr =
665 (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
664 pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0; 666 pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
665 } 667 }
666 668
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b2e1e746b47f..d9d306c79f2d 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void)
93 return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0; 93 return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
94} 94}
95 95
96/**
97 * sn_migrate - SN-specific task migration actions
98 * @task: Task being migrated to new CPU
99 *
100 * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
101 * Context switching user threads which have memory-mapped MMIO may cause
102 * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
103 * from the previous CPU's Shub before execution resumes on the new CPU.
104 */
105void sn_migrate(struct task_struct *task)
106{
107 pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
108 volatile unsigned long *adr = last_pda->pio_write_status_addr;
109 unsigned long val = last_pda->pio_write_status_val;
110
111 /* Drain PIO writes from old CPU's Shub */
112 while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
113 != val))
114 cpu_relax();
115}
116
96void sn_tlb_migrate_finish(struct mm_struct *mm) 117void sn_tlb_migrate_finish(struct mm_struct *mm)
97{ 118{
98 /* flush_tlb_mm is inefficient if more than 1 users of mm */ 119 /* flush_tlb_mm is inefficient if more than 1 users of mm */
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index cdf6856ce089..d0abddd9ffe6 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -21,7 +21,6 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/cache.h> 22#include <linux/cache.h>
23#include <linux/interrupt.h> 23#include <linux/interrupt.h>
24#include <linux/slab.h>
25#include <linux/mutex.h> 24#include <linux/mutex.h>
26#include <linux/completion.h> 25#include <linux/completion.h>
27#include <asm/sn/bte.h> 26#include <asm/sn/bte.h>
@@ -30,6 +29,31 @@
30 29
31 30
32/* 31/*
32 * Guarantee that the kzalloc'd memory is cacheline aligned.
33 */
34static void *
35xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
36{
37 /* see if kzalloc will give us cachline aligned memory by default */
38 *base = kzalloc(size, flags);
39 if (*base == NULL) {
40 return NULL;
41 }
42 if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
43 return *base;
44 }
45 kfree(*base);
46
47 /* nope, we'll have to do it ourselves */
48 *base = kzalloc(size + L1_CACHE_BYTES, flags);
49 if (*base == NULL) {
50 return NULL;
51 }
52 return (void *) L1_CACHE_ALIGN((u64) *base);
53}
54
55
56/*
33 * Set up the initial values for the XPartition Communication channels. 57 * Set up the initial values for the XPartition Communication channels.
34 */ 58 */
35static void 59static void
@@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part)
93 * Allocate all of the channel structures as a contiguous chunk of 117 * Allocate all of the channel structures as a contiguous chunk of
94 * memory. 118 * memory.
95 */ 119 */
96 part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS, 120 part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
97 GFP_KERNEL); 121 GFP_KERNEL);
98 if (part->channels == NULL) { 122 if (part->channels == NULL) {
99 dev_err(xpc_chan, "can't get memory for channels\n"); 123 dev_err(xpc_chan, "can't get memory for channels\n");
100 return xpcNoMemory; 124 return xpcNoMemory;
101 } 125 }
102 memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS);
103 126
104 part->nchannels = XPC_NCHANNELS; 127 part->nchannels = XPC_NCHANNELS;
105 128
106 129
107 /* allocate all the required GET/PUT values */ 130 /* allocate all the required GET/PUT values */
108 131
109 part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, 132 part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
110 GFP_KERNEL, &part->local_GPs_base); 133 GFP_KERNEL, &part->local_GPs_base);
111 if (part->local_GPs == NULL) { 134 if (part->local_GPs == NULL) {
112 kfree(part->channels); 135 kfree(part->channels);
@@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part)
115 "values\n"); 138 "values\n");
116 return xpcNoMemory; 139 return xpcNoMemory;
117 } 140 }
118 memset(part->local_GPs, 0, XPC_GP_SIZE);
119 141
120 part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE, 142 part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
121 GFP_KERNEL, &part->remote_GPs_base); 143 GFP_KERNEL, &part->remote_GPs_base);
122 if (part->remote_GPs == NULL) { 144 if (part->remote_GPs == NULL) {
123 kfree(part->channels);
124 part->channels = NULL;
125 kfree(part->local_GPs_base);
126 part->local_GPs = NULL;
127 dev_err(xpc_chan, "can't get memory for remote get/put " 145 dev_err(xpc_chan, "can't get memory for remote get/put "
128 "values\n"); 146 "values\n");
147 kfree(part->local_GPs_base);
148 part->local_GPs = NULL;
149 kfree(part->channels);
150 part->channels = NULL;
129 return xpcNoMemory; 151 return xpcNoMemory;
130 } 152 }
131 memset(part->remote_GPs, 0, XPC_GP_SIZE);
132 153
133 154
134 /* allocate all the required open and close args */ 155 /* allocate all the required open and close args */
135 156
136 part->local_openclose_args = xpc_kmalloc_cacheline_aligned( 157 part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
137 XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, 158 XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
138 &part->local_openclose_args_base); 159 &part->local_openclose_args_base);
139 if (part->local_openclose_args == NULL) { 160 if (part->local_openclose_args == NULL) {
140 kfree(part->channels); 161 dev_err(xpc_chan, "can't get memory for local connect args\n");
141 part->channels = NULL;
142 kfree(part->local_GPs_base);
143 part->local_GPs = NULL;
144 kfree(part->remote_GPs_base); 162 kfree(part->remote_GPs_base);
145 part->remote_GPs = NULL; 163 part->remote_GPs = NULL;
146 dev_err(xpc_chan, "can't get memory for local connect args\n"); 164 kfree(part->local_GPs_base);
165 part->local_GPs = NULL;
166 kfree(part->channels);
167 part->channels = NULL;
147 return xpcNoMemory; 168 return xpcNoMemory;
148 } 169 }
149 memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
150 170
151 part->remote_openclose_args = xpc_kmalloc_cacheline_aligned( 171 part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
152 XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL, 172 XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
153 &part->remote_openclose_args_base); 173 &part->remote_openclose_args_base);
154 if (part->remote_openclose_args == NULL) { 174 if (part->remote_openclose_args == NULL) {
155 kfree(part->channels); 175 dev_err(xpc_chan, "can't get memory for remote connect args\n");
156 part->channels = NULL;
157 kfree(part->local_GPs_base);
158 part->local_GPs = NULL;
159 kfree(part->remote_GPs_base);
160 part->remote_GPs = NULL;
161 kfree(part->local_openclose_args_base); 176 kfree(part->local_openclose_args_base);
162 part->local_openclose_args = NULL; 177 part->local_openclose_args = NULL;
163 dev_err(xpc_chan, "can't get memory for remote connect args\n"); 178 kfree(part->remote_GPs_base);
179 part->remote_GPs = NULL;
180 kfree(part->local_GPs_base);
181 part->local_GPs = NULL;
182 kfree(part->channels);
183 part->channels = NULL;
164 return xpcNoMemory; 184 return xpcNoMemory;
165 } 185 }
166 memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
167 186
168 187
169 xpc_initialize_channels(part, partid); 188 xpc_initialize_channels(part, partid);
@@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part)
186 ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ, 205 ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
187 part->IPI_owner, (void *) (u64) partid); 206 part->IPI_owner, (void *) (u64) partid);
188 if (ret != 0) { 207 if (ret != 0) {
189 kfree(part->channels);
190 part->channels = NULL;
191 kfree(part->local_GPs_base);
192 part->local_GPs = NULL;
193 kfree(part->remote_GPs_base);
194 part->remote_GPs = NULL;
195 kfree(part->local_openclose_args_base);
196 part->local_openclose_args = NULL;
197 kfree(part->remote_openclose_args_base);
198 part->remote_openclose_args = NULL;
199 dev_err(xpc_chan, "can't register NOTIFY IRQ handler, " 208 dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
200 "errno=%d\n", -ret); 209 "errno=%d\n", -ret);
210 kfree(part->remote_openclose_args_base);
211 part->remote_openclose_args = NULL;
212 kfree(part->local_openclose_args_base);
213 part->local_openclose_args = NULL;
214 kfree(part->remote_GPs_base);
215 part->remote_GPs = NULL;
216 kfree(part->local_GPs_base);
217 part->local_GPs = NULL;
218 kfree(part->channels);
219 part->channels = NULL;
201 return xpcLackOfResources; 220 return xpcLackOfResources;
202 } 221 }
203 222
@@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
446 for (nentries = ch->local_nentries; nentries > 0; nentries--) { 465 for (nentries = ch->local_nentries; nentries > 0; nentries--) {
447 466
448 nbytes = nentries * ch->msg_size; 467 nbytes = nentries * ch->msg_size;
449 ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, 468 ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
450 GFP_KERNEL, 469 GFP_KERNEL,
451 &ch->local_msgqueue_base); 470 &ch->local_msgqueue_base);
452 if (ch->local_msgqueue == NULL) { 471 if (ch->local_msgqueue == NULL) {
453 continue; 472 continue;
454 } 473 }
455 memset(ch->local_msgqueue, 0, nbytes);
456 474
457 nbytes = nentries * sizeof(struct xpc_notify); 475 nbytes = nentries * sizeof(struct xpc_notify);
458 ch->notify_queue = kmalloc(nbytes, GFP_KERNEL); 476 ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
459 if (ch->notify_queue == NULL) { 477 if (ch->notify_queue == NULL) {
460 kfree(ch->local_msgqueue_base); 478 kfree(ch->local_msgqueue_base);
461 ch->local_msgqueue = NULL; 479 ch->local_msgqueue = NULL;
462 continue; 480 continue;
463 } 481 }
464 memset(ch->notify_queue, 0, nbytes);
465 482
466 spin_lock_irqsave(&ch->lock, irq_flags); 483 spin_lock_irqsave(&ch->lock, irq_flags);
467 if (nentries < ch->local_nentries) { 484 if (nentries < ch->local_nentries) {
@@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
501 for (nentries = ch->remote_nentries; nentries > 0; nentries--) { 518 for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
502 519
503 nbytes = nentries * ch->msg_size; 520 nbytes = nentries * ch->msg_size;
504 ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes, 521 ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
505 GFP_KERNEL, 522 GFP_KERNEL,
506 &ch->remote_msgqueue_base); 523 &ch->remote_msgqueue_base);
507 if (ch->remote_msgqueue == NULL) { 524 if (ch->remote_msgqueue == NULL) {
508 continue; 525 continue;
509 } 526 }
510 memset(ch->remote_msgqueue, 0, nbytes);
511 527
512 spin_lock_irqsave(&ch->lock, irq_flags); 528 spin_lock_irqsave(&ch->lock, irq_flags);
513 if (nentries < ch->remote_nentries) { 529 if (nentries < ch->remote_nentries) {
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 8cbf16432570..99b123a6421a 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -52,7 +52,6 @@
52#include <linux/syscalls.h> 52#include <linux/syscalls.h>
53#include <linux/cache.h> 53#include <linux/cache.h>
54#include <linux/interrupt.h> 54#include <linux/interrupt.h>
55#include <linux/slab.h>
56#include <linux/delay.h> 55#include <linux/delay.h>
57#include <linux/reboot.h> 56#include <linux/reboot.h>
58#include <linux/completion.h> 57#include <linux/completion.h>
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 88a730e6cfdb..94211429fd0c 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
81 81
82 82
83/* 83/*
84 * Guarantee that the kmalloc'd memory is cacheline aligned.
85 */
86static void *
87xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
88{
89 /* see if kmalloc will give us cachline aligned memory by default */
90 *base = kmalloc(size, flags);
91 if (*base == NULL) {
92 return NULL;
93 }
94 if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
95 return *base;
96 }
97 kfree(*base);
98
99 /* nope, we'll have to do it ourselves */
100 *base = kmalloc(size + L1_CACHE_BYTES, flags);
101 if (*base == NULL) {
102 return NULL;
103 }
104 return (void *) L1_CACHE_ALIGN((u64) *base);
105}
106
107
108/*
84 * Given a nasid, get the physical address of the partition's reserved page 109 * Given a nasid, get the physical address of the partition's reserved page
85 * for that nasid. This function returns 0 on any error. 110 * for that nasid. This function returns 0 on any error.
86 */ 111 */
@@ -1038,13 +1063,12 @@ xpc_discovery(void)
1038 remote_vars = (struct xpc_vars *) remote_rp; 1063 remote_vars = (struct xpc_vars *) remote_rp;
1039 1064
1040 1065
1041 discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words, 1066 discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
1042 GFP_KERNEL); 1067 GFP_KERNEL);
1043 if (discovered_nasids == NULL) { 1068 if (discovered_nasids == NULL) {
1044 kfree(remote_rp_base); 1069 kfree(remote_rp_base);
1045 return; 1070 return;
1046 } 1071 }
1047 memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
1048 1072
1049 rp = (struct xpc_rsvd_page *) xpc_rsvd_page; 1073 rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
1050 1074
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e52831ed93eb..fa073cc4b565 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -15,6 +15,124 @@
15#include <asm/sn/pcidev.h> 15#include <asm/sn/pcidev.h>
16#include <asm/sn/pcibus_provider_defs.h> 16#include <asm/sn/pcibus_provider_defs.h>
17#include <asm/sn/tioce_provider.h> 17#include <asm/sn/tioce_provider.h>
18#include <asm/sn/sn2/sn_hwperf.h>
19
20/*
21 * 1/26/2006
22 *
23 * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe
24 * (taken from the above PV) before and after accessing tioce internal MMR's
25 * to avoid tioce lockups.
26 *
27 * The recipe as taken from the PV:
28 *
29 * if(mmr address < 0x45000) {
30 * if(mmr address == 0 or 0x80)
31 * mmr wrt or read address 0xc0
32 * else if(mmr address == 0x148 or 0x200)
33 * mmr wrt or read address 0x28
34 * else
35 * mmr wrt or read address 0x158
36 *
37 * do desired mmr access (rd or wrt)
38 *
39 * if(mmr address == 0x100)
40 * mmr wrt or read address 0x38
41 * mmr wrt or read address 0xb050
42 * } else
43 * do desired mmr access
44 *
45 * According to hw, we can use reads instead of writes to the above addres
46 *
47 * Note this WAR can only to be used for accessing internal MMR's in the
48 * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the
49 * "Local CE Registers and Memories" and "PCI Compatible Config Space" address
50 * spaces from table 2-1 of the "CE Programmer's Reference Overview" document.
51 *
52 * All registers defined in struct tioce will meet that criteria.
53 */
54
55static void inline
56tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
57{
58 u64 mmr_base;
59 u64 mmr_offset;
60
61 if (kern->ce_common->ce_rev != TIOCE_REV_A)
62 return;
63
64 mmr_base = kern->ce_common->ce_pcibus.bs_base;
65 mmr_offset = (u64)mmr_addr - mmr_base;
66
67 if (mmr_offset < 0x45000) {
68 u64 mmr_war_offset;
69
70 if (mmr_offset == 0 || mmr_offset == 0x80)
71 mmr_war_offset = 0xc0;
72 else if (mmr_offset == 0x148 || mmr_offset == 0x200)
73 mmr_war_offset = 0x28;
74 else
75 mmr_war_offset = 0x158;
76
77 readq_relaxed((void *)(mmr_base + mmr_war_offset));
78 }
79}
80
81static void inline
82tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
83{
84 u64 mmr_base;
85 u64 mmr_offset;
86
87 if (kern->ce_common->ce_rev != TIOCE_REV_A)
88 return;
89
90 mmr_base = kern->ce_common->ce_pcibus.bs_base;
91 mmr_offset = (u64)mmr_addr - mmr_base;
92
93 if (mmr_offset < 0x45000) {
94 if (mmr_offset == 0x100)
95 readq_relaxed((void *)(mmr_base + 0x38));
96 readq_relaxed((void *)(mmr_base + 0xb050));
97 }
98}
99
100/* load mmr contents into a variable */
101#define tioce_mmr_load(kern, mmrp, varp) do {\
102 tioce_mmr_war_pre(kern, mmrp); \
103 *(varp) = readq_relaxed(mmrp); \
104 tioce_mmr_war_post(kern, mmrp); \
105} while (0)
106
107/* store variable contents into mmr */
108#define tioce_mmr_store(kern, mmrp, varp) do {\
109 tioce_mmr_war_pre(kern, mmrp); \
110 writeq(*varp, mmrp); \
111 tioce_mmr_war_post(kern, mmrp); \
112} while (0)
113
114/* store immediate value into mmr */
115#define tioce_mmr_storei(kern, mmrp, val) do {\
116 tioce_mmr_war_pre(kern, mmrp); \
117 writeq(val, mmrp); \
118 tioce_mmr_war_post(kern, mmrp); \
119} while (0)
120
121/* set bits (immediate value) into mmr */
122#define tioce_mmr_seti(kern, mmrp, bits) do {\
123 u64 tmp; \
124 tioce_mmr_load(kern, mmrp, &tmp); \
125 tmp |= (bits); \
126 tioce_mmr_store(kern, mmrp, &tmp); \
127} while (0)
128
129/* clear bits (immediate value) into mmr */
130#define tioce_mmr_clri(kern, mmrp, bits) do { \
131 u64 tmp; \
132 tioce_mmr_load(kern, mmrp, &tmp); \
133 tmp &= ~(bits); \
134 tioce_mmr_store(kern, mmrp, &tmp); \
135} while (0)
18 136
19/** 137/**
20 * Bus address ranges for the 5 flavors of TIOCE DMA 138 * Bus address ranges for the 5 flavors of TIOCE DMA
@@ -62,9 +180,9 @@
62#define TIOCE_ATE_M40 2 180#define TIOCE_ATE_M40 2
63#define TIOCE_ATE_M40S 3 181#define TIOCE_ATE_M40S 3
64 182
65#define KB(x) ((x) << 10) 183#define KB(x) ((u64)(x) << 10)
66#define MB(x) ((x) << 20) 184#define MB(x) ((u64)(x) << 20)
67#define GB(x) ((x) << 30) 185#define GB(x) ((u64)(x) << 30)
68 186
69/** 187/**
70 * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode 188 * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode
@@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
151 int last; 269 int last;
152 int entries; 270 int entries;
153 int nates; 271 int nates;
154 int pagesize; 272 u64 pagesize;
155 u64 *ate_shadow; 273 u64 *ate_shadow;
156 u64 *ate_reg; 274 u64 *ate_reg;
157 u64 addr; 275 u64 addr;
@@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
228 346
229 ate = ATE_MAKE(addr, pagesize); 347 ate = ATE_MAKE(addr, pagesize);
230 ate_shadow[i + j] = ate; 348 ate_shadow[i + j] = ate;
231 writeq(ate, &ate_reg[i + j]); 349 tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
232 addr += pagesize; 350 addr += pagesize;
233 } 351 }
234 352
@@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr)
272 u64 tmp; 390 u64 tmp;
273 391
274 ce_kern->ce_port[port].dirmap_shadow = ct_upper; 392 ce_kern->ce_port[port].dirmap_shadow = ct_upper;
275 writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]); 393 tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
394 ct_upper);
276 tmp = ce_mmr->ce_ure_dir_map[port]; 395 tmp = ce_mmr->ce_ure_dir_map[port];
277 dma_ok = 1; 396 dma_ok = 1;
278 } else 397 } else
@@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
344 if (TIOCE_D32_ADDR(bus_addr)) { 463 if (TIOCE_D32_ADDR(bus_addr)) {
345 if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { 464 if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
346 ce_kern->ce_port[port].dirmap_shadow = 0; 465 ce_kern->ce_port[port].dirmap_shadow = 0;
347 writeq(0, &ce_mmr->ce_ure_dir_map[port]); 466 tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
467 0);
348 } 468 }
349 } else { 469 } else {
350 struct tioce_dmamap *map; 470 struct tioce_dmamap *map;
@@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
365 } else if (--map->refcnt == 0) { 485 } else if (--map->refcnt == 0) {
366 for (i = 0; i < map->ate_count; i++) { 486 for (i = 0; i < map->ate_count; i++) {
367 map->ate_shadow[i] = 0; 487 map->ate_shadow[i] = 0;
368 map->ate_hw[i] = 0; 488 tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0);
369 } 489 }
370 490
371 list_del(&map->ce_dmamap_list); 491 list_del(&map->ce_dmamap_list);
@@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
486 spin_unlock_irqrestore(&ce_kern->ce_lock, flags); 606 spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
487 607
488dma_map_done: 608dma_map_done:
489 if (mapaddr & barrier) 609 if (mapaddr && barrier)
490 mapaddr = tioce_dma_barrier(mapaddr, 1); 610 mapaddr = tioce_dma_barrier(mapaddr, 1);
491 611
492 return mapaddr; 612 return mapaddr;
@@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
541 soft->ce_pcibus.bs_persist_segment, 661 soft->ce_pcibus.bs_persist_segment,
542 soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); 662 soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0);
543 663
664 if (ret_stuff.v0)
665 panic("tioce_error_intr_handler: Fatal TIOCE error");
666
544 return IRQ_HANDLED; 667 return IRQ_HANDLED;
545} 668}
546 669
547/** 670/**
671 * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
672 * @tioce_kernel: TIOCE context to reserve ate's for
673 * @base: starting bus address to reserve
674 * @limit: last bus address to reserve
675 *
676 * If base/limit falls within the range of bus space mapped through the
677 * M32 space, reserve the resources corresponding to the range.
678 */
679static void
680tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
681{
682 int ate_index, last_ate, ps;
683 struct tioce *ce_mmr;
684
685 if (!TIOCE_M32_ADDR(base))
686 return;
687
688 ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
689 ps = ce_kern->ce_ate3240_pagesize;
690 ate_index = ATE_PAGE(base, ps);
691 last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
692
693 if (ate_index < 64)
694 ate_index = 64;
695
696 while (ate_index <= last_ate) {
697 u64 ate;
698
699 ate = ATE_MAKE(0xdeadbeef, ps);
700 ce_kern->ce_ate3240_shadow[ate_index] = ate;
701 tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
702 ate);
703 ate_index++;
704 }
705}
706
707/**
548 * tioce_kern_init - init kernel structures related to a given TIOCE 708 * tioce_kern_init - init kernel structures related to a given TIOCE
549 * @tioce_common: ptr to a cached tioce_common struct that originated in prom 709 * @tioce_common: ptr to a cached tioce_common struct that originated in prom
550 */ static struct tioce_kernel * 710 */
711static struct tioce_kernel *
551tioce_kern_init(struct tioce_common *tioce_common) 712tioce_kern_init(struct tioce_common *tioce_common)
552{ 713{
553 int i; 714 int i;
715 int ps;
716 int dev;
554 u32 tmp; 717 u32 tmp;
718 unsigned int seg, bus;
555 struct tioce *tioce_mmr; 719 struct tioce *tioce_mmr;
556 struct tioce_kernel *tioce_kern; 720 struct tioce_kernel *tioce_kern;
557 721
@@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common)
572 * here to use pci_read_config_xxx() so use the raw_pci_ops vector. 736 * here to use pci_read_config_xxx() so use the raw_pci_ops vector.
573 */ 737 */
574 738
575 raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment, 739 seg = tioce_common->ce_pcibus.bs_persist_segment;
576 tioce_common->ce_pcibus.bs_persist_busnum, 740 bus = tioce_common->ce_pcibus.bs_persist_busnum;
577 PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp); 741
742 raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
578 tioce_kern->ce_port1_secondary = (u8) tmp; 743 tioce_kern->ce_port1_secondary = (u8) tmp;
579 744
580 /* 745 /*
@@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common)
583 */ 748 */
584 749
585 tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; 750 tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
586 __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK); 751 tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
587 __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE); 752 CE_URE_PAGESIZE_MASK);
588 tioce_kern->ce_ate3240_pagesize = KB(256); 753 tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
754 CE_URE_256K_PAGESIZE);
755 ps = tioce_kern->ce_ate3240_pagesize = KB(256);
589 756
590 for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { 757 for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
591 tioce_kern->ce_ate40_shadow[i] = 0; 758 tioce_kern->ce_ate40_shadow[i] = 0;
592 writeq(0, &tioce_mmr->ce_ure_ate40[i]); 759 tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0);
593 } 760 }
594 761
595 for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { 762 for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
596 tioce_kern->ce_ate3240_shadow[i] = 0; 763 tioce_kern->ce_ate3240_shadow[i] = 0;
597 writeq(0, &tioce_mmr->ce_ure_ate3240[i]); 764 tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0);
765 }
766
767 /*
768 * Reserve ATE's corresponding to reserved address ranges. These
769 * include:
770 *
771 * Memory space covered by each PPB mem base/limit register
772 * Memory space covered by each PPB prefetch base/limit register
773 *
774 * These bus ranges are for pio (downstream) traffic only, and so
775 * cannot be used for DMA.
776 */
777
778 for (dev = 1; dev <= 2; dev++) {
779 u64 base, limit;
780
781 /* mem base/limit */
782
783 raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
784 PCI_MEMORY_BASE, 2, &tmp);
785 base = (u64)tmp << 16;
786
787 raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
788 PCI_MEMORY_LIMIT, 2, &tmp);
789 limit = (u64)tmp << 16;
790 limit |= 0xfffffUL;
791
792 if (base < limit)
793 tioce_reserve_m32(tioce_kern, base, limit);
794
795 /*
796 * prefetch mem base/limit. The tioce ppb's have 64-bit
797 * decoders, so read the upper portions w/o checking the
798 * attributes.
799 */
800
801 raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
802 PCI_PREF_MEMORY_BASE, 2, &tmp);
803 base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
804
805 raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
806 PCI_PREF_BASE_UPPER32, 4, &tmp);
807 base |= (u64)tmp << 32;
808
809 raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
810 PCI_PREF_MEMORY_LIMIT, 2, &tmp);
811
812 limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
813 limit |= 0xfffffUL;
814
815 raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
816 PCI_PREF_LIMIT_UPPER32, 4, &tmp);
817 limit |= (u64)tmp << 32;
818
819 if ((base < limit) && TIOCE_M32_ADDR(base))
820 tioce_reserve_m32(tioce_kern, base, limit);
598 } 821 }
599 822
600 return tioce_kern; 823 return tioce_kern;
@@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
614{ 837{
615 struct pcidev_info *pcidev_info; 838 struct pcidev_info *pcidev_info;
616 struct tioce_common *ce_common; 839 struct tioce_common *ce_common;
840 struct tioce_kernel *ce_kern;
617 struct tioce *ce_mmr; 841 struct tioce *ce_mmr;
618 u64 force_int_val; 842 u64 force_int_val;
619 843
@@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
629 853
630 ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; 854 ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
631 ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; 855 ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
856 ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
857
858 /*
859 * TIOCE Rev A workaround (PV 945826), force an interrupt by writing
860 * the TIO_INTx register directly (1/26/2006)
861 */
862 if (ce_common->ce_rev == TIOCE_REV_A) {
863 u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit);
864 u64 status;
865
866 tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status);
867 if (status & int_bit_mask) {
868 u64 force_irq = (1 << 8) | sn_irq_info->irq_irq;
869 u64 ctalk = sn_irq_info->irq_xtalkaddr;
870 u64 nasid, offset;
871
872 nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT;
873 offset = (ctalk & CTALK_NODE_OFFSET);
874 HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq);
875 }
876
877 return;
878 }
632 879
633 /* 880 /*
634 * irq_int_bit is originally set up by prom, and holds the interrupt 881 * irq_int_bit is originally set up by prom, and holds the interrupt
@@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
666 default: 913 default:
667 return; 914 return;
668 } 915 }
669 writeq(force_int_val, &ce_mmr->ce_adm_force_int); 916 tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val);
670} 917}
671 918
672/** 919/**
@@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
685{ 932{
686 struct pcidev_info *pcidev_info; 933 struct pcidev_info *pcidev_info;
687 struct tioce_common *ce_common; 934 struct tioce_common *ce_common;
935 struct tioce_kernel *ce_kern;
688 struct tioce *ce_mmr; 936 struct tioce *ce_mmr;
689 int bit; 937 int bit;
690 u64 vector; 938 u64 vector;
@@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
695 943
696 ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; 944 ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
697 ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; 945 ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
946 ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
698 947
699 bit = sn_irq_info->irq_int_bit; 948 bit = sn_irq_info->irq_int_bit;
700 949
701 __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); 950 tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
702 vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT; 951 vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
703 vector |= sn_irq_info->irq_xtalkaddr; 952 vector |= sn_irq_info->irq_xtalkaddr;
704 writeq(vector, &ce_mmr->ce_adm_int_dest[bit]); 953 tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector);
705 __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit)); 954 tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
706 955
707 tioce_force_interrupt(sn_irq_info); 956 tioce_force_interrupt(sn_irq_info);
708} 957}
@@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
721static void * 970static void *
722tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) 971tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
723{ 972{
973 int my_nasid;
974 cnodeid_t my_cnode, mem_cnode;
724 struct tioce_common *tioce_common; 975 struct tioce_common *tioce_common;
976 struct tioce_kernel *tioce_kern;
977 struct tioce *tioce_mmr;
725 978
726 /* 979 /*
727 * Allocate kernel bus soft and copy from prom. 980 * Allocate kernel bus soft and copy from prom.
@@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
734 memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); 987 memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
735 tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; 988 tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET;
736 989
737 if (tioce_kern_init(tioce_common) == NULL) { 990 tioce_kern = tioce_kern_init(tioce_common);
991 if (tioce_kern == NULL) {
738 kfree(tioce_common); 992 kfree(tioce_common);
739 return NULL; 993 return NULL;
740 } 994 }
741 995
996 /*
997 * Clear out any transient errors before registering the error
998 * interrupt handler.
999 */
1000
1001 tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
1002 tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
1003 tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
1004 ~0ULL);
1005 tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
1006
742 if (request_irq(SGI_PCIASIC_ERROR, 1007 if (request_irq(SGI_PCIASIC_ERROR,
743 tioce_error_intr_handler, 1008 tioce_error_intr_handler,
744 SA_SHIRQ, "TIOCE error", (void *)tioce_common)) 1009 SA_SHIRQ, "TIOCE error", (void *)tioce_common))
@@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
750 tioce_common->ce_pcibus.bs_persist_segment, 1015 tioce_common->ce_pcibus.bs_persist_segment,
751 tioce_common->ce_pcibus.bs_persist_busnum); 1016 tioce_common->ce_pcibus.bs_persist_busnum);
752 1017
1018 /*
1019 * identify closest nasid for memory allocations
1020 */
1021
1022 my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base);
1023 my_cnode = nasid_to_cnodeid(my_nasid);
1024
1025 if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) {
1026 printk(KERN_WARNING "tioce_bus_fixup: failed to find "
1027 "closest node with MEM to TIO node %d\n", my_cnode);
1028 mem_cnode = (cnodeid_t)-1; /* use any node */
1029 }
1030
1031 controller->node = mem_cnode;
1032
753 return tioce_common; 1033 return tioce_common;
754} 1034}
755 1035