aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c63
-rw-r--r--arch/powerpc/kernel/cacheinfo.c4
-rw-r--r--arch/powerpc/kernel/clock.c82
-rw-r--r--arch/powerpc/kernel/cpu_setup_fsl_booke.S54
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S38
-rw-r--r--arch/powerpc/kernel/cputable.c16
-rw-r--r--arch/powerpc/kernel/crash.c1
-rw-r--r--arch/powerpc/kernel/crash_dump.c8
-rw-r--r--arch/powerpc/kernel/dma-iommu.c4
-rw-r--r--arch/powerpc/kernel/dma.c10
-rw-r--r--arch/powerpc/kernel/eeh.c49
-rw-r--r--arch/powerpc/kernel/eeh_driver.c189
-rw-r--r--arch/powerpc/kernel/eeh_pe.c4
-rw-r--r--arch/powerpc/kernel/entry_64.S12
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S27
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S242
-rw-r--r--arch/powerpc/kernel/fpu.S16
-rw-r--r--arch/powerpc/kernel/fsl_booke_entry_mapping.S2
-rw-r--r--arch/powerpc/kernel/ftrace.c1
-rw-r--r--arch/powerpc/kernel/head_64.S1
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S266
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c1
-rw-r--r--arch/powerpc/kernel/idle_power7.S1
-rw-r--r--arch/powerpc/kernel/iomap.c1
-rw-r--r--arch/powerpc/kernel/iommu.c157
-rw-r--r--arch/powerpc/kernel/irq.c17
-rw-r--r--arch/powerpc/kernel/kgdb.c1
-rw-r--r--arch/powerpc/kernel/kvm.c41
-rw-r--r--arch/powerpc/kernel/machine_kexec.c14
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c6
-rw-r--r--arch/powerpc/kernel/mce.c352
-rw-r--r--arch/powerpc/kernel/mce_power.c284
-rw-r--r--arch/powerpc/kernel/misc_32.S9
-rw-r--r--arch/powerpc/kernel/misc_64.S6
-rw-r--r--arch/powerpc/kernel/paca.c37
-rw-r--r--arch/powerpc/kernel/pci-common.c4
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c4
-rw-r--r--arch/powerpc/kernel/process.c188
-rw-r--r--arch/powerpc/kernel/prom.c41
-rw-r--r--arch/powerpc/kernel/prom_init.c22
-rw-r--r--arch/powerpc/kernel/reloc_64.S5
-rw-r--r--arch/powerpc/kernel/setup_32.c8
-rw-r--r--arch/powerpc/kernel/setup_64.c50
-rw-r--r--arch/powerpc/kernel/signal.c3
-rw-r--r--arch/powerpc/kernel/signal_32.c40
-rw-r--r--arch/powerpc/kernel/signal_64.c18
-rw-r--r--arch/powerpc/kernel/smp-tbsync.c1
-rw-r--r--arch/powerpc/kernel/smp.c9
-rw-r--r--arch/powerpc/kernel/swsusp_booke.S32
-rw-r--r--arch/powerpc/kernel/syscalls.c1
-rw-r--r--arch/powerpc/kernel/sysfs.c390
-rw-r--r--arch/powerpc/kernel/time.c14
-rw-r--r--arch/powerpc/kernel/traps.c72
-rw-r--r--arch/powerpc/kernel/uprobes.c2
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32_wrapper.S3
-rw-r--r--arch/powerpc/kernel/vdso64/vdso64_wrapper.S3
-rw-r--r--arch/powerpc/kernel/vector.S10
-rw-r--r--arch/powerpc/kernel/vio.c31
59 files changed, 2385 insertions, 584 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 445cb6e39d5b..fcc9a89a4695 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
39obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 39obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
40obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o 40obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
41obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o 41obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
42obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
42obj64-$(CONFIG_RELOCATABLE) += reloc_64.o 43obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
43obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o 44obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
44obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o 45obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
@@ -47,7 +48,6 @@ obj-$(CONFIG_ALTIVEC) += vecemu.o
47obj-$(CONFIG_PPC_970_NAP) += idle_power4.o 48obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
48obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o 49obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
49obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o 50obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o
50obj-$(CONFIG_PPC_CLOCK) += clock.o
51procfs-y := proc_powerpc.o 51procfs-y := proc_powerpc.o
52obj-$(CONFIG_PROC_FS) += $(procfs-y) 52obj-$(CONFIG_PROC_FS) += $(procfs-y)
53rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o 53rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d3de01066f7d..b5aacf72ae6f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -203,6 +203,15 @@ int main(void)
203 DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack)); 203 DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
204 DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack)); 204 DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
205 DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack)); 205 DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
206 DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr));
207
208 DEFINE(TCD_ESEL_NEXT,
209 offsetof(struct tlb_core_data, esel_next));
210 DEFINE(TCD_ESEL_MAX,
211 offsetof(struct tlb_core_data, esel_max));
212 DEFINE(TCD_ESEL_FIRST,
213 offsetof(struct tlb_core_data, esel_first));
214 DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock));
206#endif /* CONFIG_PPC_BOOK3E */ 215#endif /* CONFIG_PPC_BOOK3E */
207 216
208#ifdef CONFIG_PPC_STD_MMU_64 217#ifdef CONFIG_PPC_STD_MMU_64
@@ -232,6 +241,10 @@ int main(void)
232 DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx)); 241 DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
233#endif /* CONFIG_PPC_STD_MMU_64 */ 242#endif /* CONFIG_PPC_STD_MMU_64 */
234 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); 243 DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
244#ifdef CONFIG_PPC_BOOK3S_64
245 DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
246 DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
247#endif
235 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); 248 DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
236 DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state)); 249 DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
237 DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime)); 250 DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
@@ -425,18 +438,14 @@ int main(void)
425 DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid)); 438 DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
426 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); 439 DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
427 DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave)); 440 DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
428 DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fpr)); 441 DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));
429 DEFINE(VCPU_FPSCR, offsetof(struct kvm_vcpu, arch.fpscr));
430#ifdef CONFIG_ALTIVEC 442#ifdef CONFIG_ALTIVEC
431 DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr)); 443 DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));
432 DEFINE(VCPU_VSCR, offsetof(struct kvm_vcpu, arch.vscr));
433#endif
434#ifdef CONFIG_VSX
435 DEFINE(VCPU_VSRS, offsetof(struct kvm_vcpu, arch.vsr));
436#endif 444#endif
437 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); 445 DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
438 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); 446 DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
439 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); 447 DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
448 DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
440 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); 449 DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
441 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); 450 DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
442#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 451#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
@@ -484,16 +493,24 @@ int main(void)
484 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar)); 493 DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
485 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr)); 494 DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
486 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty)); 495 DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
496 DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
487#endif 497#endif
488#ifdef CONFIG_PPC_BOOK3S 498#ifdef CONFIG_PPC_BOOK3S
489 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id)); 499 DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
490 DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr)); 500 DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
491 DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr)); 501 DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
502 DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic));
503 DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb));
492 DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr)); 504 DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
493 DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr)); 505 DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
494 DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor)); 506 DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
507 DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr));
495 DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl)); 508 DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
496 DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr)); 509 DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
510 DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx));
511 DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr));
512 DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx));
513 DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr));
497 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); 514 DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
498 DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec)); 515 DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
499 DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires)); 516 DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
@@ -502,8 +519,10 @@ int main(void)
502 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded)); 519 DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
503 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr)); 520 DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
504 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc)); 521 DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
522 DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc));
505 DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar)); 523 DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
506 DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar)); 524 DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
525 DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier));
507 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb)); 526 DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
508 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max)); 527 DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
509 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr)); 528 DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
@@ -511,20 +530,47 @@ int main(void)
511 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar)); 530 DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
512 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); 531 DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
513 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap)); 532 DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
514 DEFINE(VCPU_PTID, offsetof(struct kvm_vcpu, arch.ptid));
515 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar)); 533 DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
516 DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr)); 534 DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
535 DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
536 DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
537 DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
538 DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
539 DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr));
540 DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr));
541 DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr));
542 DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
543 DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
544 DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
517 DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1)); 545 DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
518 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count)); 546 DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
519 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count)); 547 DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
520 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest)); 548 DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
521 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads)); 549 DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
550 DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
522 DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset)); 551 DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
523 DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr)); 552 DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
524 DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr)); 553 DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
554 DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));
525 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige)); 555 DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
526 DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv)); 556 DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
527 DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb)); 557 DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
558#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
559 DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar));
560 DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar));
561 DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr));
562 DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm));
563 DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr));
564 DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
565 DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
566 DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
567 DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
568 DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
569 DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
570 DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm));
571 DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm));
572 DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm));
573#endif
528 574
529#ifdef CONFIG_PPC_BOOK3S_64 575#ifdef CONFIG_PPC_BOOK3S_64
530#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE 576#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -589,6 +635,7 @@ int main(void)
589 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); 635 HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
590 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); 636 HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
591 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); 637 HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
638 HSTATE_FIELD(HSTATE_PTID, ptid);
592 HSTATE_FIELD(HSTATE_MMCR, host_mmcr); 639 HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
593 HSTATE_FIELD(HSTATE_PMC, host_pmc); 640 HSTATE_FIELD(HSTATE_PMC, host_pmc);
594 HSTATE_FIELD(HSTATE_PURR, host_purr); 641 HSTATE_FIELD(HSTATE_PURR, host_purr);
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 654932727873..2912b8787aa4 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -12,7 +12,6 @@
12 12
13#include <linux/cpu.h> 13#include <linux/cpu.h>
14#include <linux/cpumask.h> 14#include <linux/cpumask.h>
15#include <linux/init.h>
16#include <linux/kernel.h> 15#include <linux/kernel.h>
17#include <linux/kobject.h> 16#include <linux/kobject.h>
18#include <linux/list.h> 17#include <linux/list.h>
@@ -794,6 +793,9 @@ static void remove_cache_dir(struct cache_dir *cache_dir)
794{ 793{
795 remove_index_dirs(cache_dir); 794 remove_index_dirs(cache_dir);
796 795
796 /* Remove cache dir from sysfs */
797 kobject_del(cache_dir->kobj);
798
797 kobject_put(cache_dir->kobj); 799 kobject_put(cache_dir->kobj);
798 800
799 kfree(cache_dir); 801 kfree(cache_dir);
diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c
deleted file mode 100644
index a764b47791e8..000000000000
--- a/arch/powerpc/kernel/clock.c
+++ /dev/null
@@ -1,82 +0,0 @@
1/*
2 * Dummy clk implementations for powerpc.
3 * These need to be overridden in platform code.
4 */
5
6#include <linux/clk.h>
7#include <linux/err.h>
8#include <linux/errno.h>
9#include <linux/export.h>
10#include <asm/clk_interface.h>
11
12struct clk_interface clk_functions;
13
14struct clk *clk_get(struct device *dev, const char *id)
15{
16 if (clk_functions.clk_get)
17 return clk_functions.clk_get(dev, id);
18 return ERR_PTR(-ENOSYS);
19}
20EXPORT_SYMBOL(clk_get);
21
22void clk_put(struct clk *clk)
23{
24 if (clk_functions.clk_put)
25 clk_functions.clk_put(clk);
26}
27EXPORT_SYMBOL(clk_put);
28
29int clk_enable(struct clk *clk)
30{
31 if (clk_functions.clk_enable)
32 return clk_functions.clk_enable(clk);
33 return -ENOSYS;
34}
35EXPORT_SYMBOL(clk_enable);
36
37void clk_disable(struct clk *clk)
38{
39 if (clk_functions.clk_disable)
40 clk_functions.clk_disable(clk);
41}
42EXPORT_SYMBOL(clk_disable);
43
44unsigned long clk_get_rate(struct clk *clk)
45{
46 if (clk_functions.clk_get_rate)
47 return clk_functions.clk_get_rate(clk);
48 return 0;
49}
50EXPORT_SYMBOL(clk_get_rate);
51
52long clk_round_rate(struct clk *clk, unsigned long rate)
53{
54 if (clk_functions.clk_round_rate)
55 return clk_functions.clk_round_rate(clk, rate);
56 return -ENOSYS;
57}
58EXPORT_SYMBOL(clk_round_rate);
59
60int clk_set_rate(struct clk *clk, unsigned long rate)
61{
62 if (clk_functions.clk_set_rate)
63 return clk_functions.clk_set_rate(clk, rate);
64 return -ENOSYS;
65}
66EXPORT_SYMBOL(clk_set_rate);
67
68struct clk *clk_get_parent(struct clk *clk)
69{
70 if (clk_functions.clk_get_parent)
71 return clk_functions.clk_get_parent(clk);
72 return ERR_PTR(-ENOSYS);
73}
74EXPORT_SYMBOL(clk_get_parent);
75
76int clk_set_parent(struct clk *clk, struct clk *parent)
77{
78 if (clk_functions.clk_set_parent)
79 return clk_functions.clk_set_parent(clk, parent);
80 return -ENOSYS;
81}
82EXPORT_SYMBOL(clk_set_parent);
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index bfb18c7290b7..cc2d8962e090 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -53,11 +53,57 @@ _GLOBAL(__e500_dcache_setup)
53 isync 53 isync
54 blr 54 blr
55 55
56/*
57 * FIXME - we haven't yet done testing to determine a reasonable default
58 * value for PW20_WAIT_IDLE_BIT.
59 */
60#define PW20_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */
61_GLOBAL(setup_pw20_idle)
62 mfspr r3, SPRN_PWRMGTCR0
63
64 /* Set PW20_WAIT bit, enable pw20 state*/
65 ori r3, r3, PWRMGTCR0_PW20_WAIT
66 li r11, PW20_WAIT_IDLE_BIT
67
68 /* Set Automatic PW20 Core Idle Count */
69 rlwimi r3, r11, PWRMGTCR0_PW20_ENT_SHIFT, PWRMGTCR0_PW20_ENT
70
71 mtspr SPRN_PWRMGTCR0, r3
72
73 blr
74
75/*
76 * FIXME - we haven't yet done testing to determine a reasonable default
77 * value for AV_WAIT_IDLE_BIT.
78 */
79#define AV_WAIT_IDLE_BIT 50 /* 1ms, TB frequency is 41.66MHZ */
80_GLOBAL(setup_altivec_idle)
81 mfspr r3, SPRN_PWRMGTCR0
82
83 /* Enable Altivec Idle */
84 oris r3, r3, PWRMGTCR0_AV_IDLE_PD_EN@h
85 li r11, AV_WAIT_IDLE_BIT
86
87 /* Set Automatic AltiVec Idle Count */
88 rlwimi r3, r11, PWRMGTCR0_AV_IDLE_CNT_SHIFT, PWRMGTCR0_AV_IDLE_CNT
89
90 mtspr SPRN_PWRMGTCR0, r3
91
92 blr
93
56_GLOBAL(__setup_cpu_e6500) 94_GLOBAL(__setup_cpu_e6500)
57 mflr r6 95 mflr r6
58#ifdef CONFIG_PPC64 96#ifdef CONFIG_PPC64
59 bl .setup_altivec_ivors 97 bl .setup_altivec_ivors
98 /* Touch IVOR42 only if the CPU supports E.HV category */
99 mfspr r10,SPRN_MMUCFG
100 rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
101 beq 1f
102 bl .setup_lrat_ivor
1031:
60#endif 104#endif
105 bl setup_pw20_idle
106 bl setup_altivec_idle
61 bl __setup_cpu_e5500 107 bl __setup_cpu_e5500
62 mtlr r6 108 mtlr r6
63 blr 109 blr
@@ -119,6 +165,14 @@ _GLOBAL(__setup_cpu_e5500)
119_GLOBAL(__restore_cpu_e6500) 165_GLOBAL(__restore_cpu_e6500)
120 mflr r5 166 mflr r5
121 bl .setup_altivec_ivors 167 bl .setup_altivec_ivors
168 /* Touch IVOR42 only if the CPU supports E.HV category */
169 mfspr r10,SPRN_MMUCFG
170 rlwinm. r10,r10,0,MMUCFG_LPIDSIZE
171 beq 1f
172 bl .setup_lrat_ivor
1731:
174 bl .setup_pw20_idle
175 bl .setup_altivec_idle
122 bl __restore_cpu_e5500 176 bl __restore_cpu_e5500
123 mtlr r5 177 mtlr r5
124 blr 178 blr
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 18b5b9cf8e37..37d1bb002aa9 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -29,7 +29,7 @@ _GLOBAL(__setup_cpu_power7)
29 mtspr SPRN_LPID,r0 29 mtspr SPRN_LPID,r0
30 mfspr r3,SPRN_LPCR 30 mfspr r3,SPRN_LPCR
31 bl __init_LPCR 31 bl __init_LPCR
32 bl __init_TLB 32 bl __init_tlb_power7
33 mtlr r11 33 mtlr r11
34 blr 34 blr
35 35
@@ -42,7 +42,7 @@ _GLOBAL(__restore_cpu_power7)
42 mtspr SPRN_LPID,r0 42 mtspr SPRN_LPID,r0
43 mfspr r3,SPRN_LPCR 43 mfspr r3,SPRN_LPCR
44 bl __init_LPCR 44 bl __init_LPCR
45 bl __init_TLB 45 bl __init_tlb_power7
46 mtlr r11 46 mtlr r11
47 blr 47 blr
48 48
@@ -59,7 +59,7 @@ _GLOBAL(__setup_cpu_power8)
59 oris r3, r3, LPCR_AIL_3@h 59 oris r3, r3, LPCR_AIL_3@h
60 bl __init_LPCR 60 bl __init_LPCR
61 bl __init_HFSCR 61 bl __init_HFSCR
62 bl __init_TLB 62 bl __init_tlb_power8
63 bl __init_PMU_HV 63 bl __init_PMU_HV
64 mtlr r11 64 mtlr r11
65 blr 65 blr
@@ -78,7 +78,7 @@ _GLOBAL(__restore_cpu_power8)
78 oris r3, r3, LPCR_AIL_3@h 78 oris r3, r3, LPCR_AIL_3@h
79 bl __init_LPCR 79 bl __init_LPCR
80 bl __init_HFSCR 80 bl __init_HFSCR
81 bl __init_TLB 81 bl __init_tlb_power8
82 bl __init_PMU_HV 82 bl __init_PMU_HV
83 mtlr r11 83 mtlr r11
84 blr 84 blr
@@ -134,15 +134,31 @@ __init_HFSCR:
134 mtspr SPRN_HFSCR,r3 134 mtspr SPRN_HFSCR,r3
135 blr 135 blr
136 136
137__init_TLB: 137/*
138 /* 138 * Clear the TLB using the specified IS form of tlbiel instruction
139 * Clear the TLB using the "IS 3" form of tlbiel instruction 139 * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
140 * (invalidate by congruence class). P7 has 128 CCs, P8 has 512 140 *
141 * so we just always do 512 141 * r3 = IS field
142 */ 142 */
143__init_tlb_power7:
144 li r3,0xc00 /* IS field = 0b11 */
145_GLOBAL(__flush_tlb_power7)
146 li r6,128
147 mtctr r6
148 mr r7,r3 /* IS field */
149 ptesync
1502: tlbiel r7
151 addi r7,r7,0x1000
152 bdnz 2b
153 ptesync
1541: blr
155
156__init_tlb_power8:
157 li r3,0xc00 /* IS field = 0b11 */
158_GLOBAL(__flush_tlb_power8)
143 li r6,512 159 li r6,512
144 mtctr r6 160 mtctr r6
145 li r7,0xc00 /* IS field = 0b11 */ 161 mr r7,r3 /* IS field */
146 ptesync 162 ptesync
1472: tlbiel r7 1632: tlbiel r7
148 addi r7,r7,0x1000 164 addi r7,r7,0x1000
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 597d954e5860..6c8dd5da4de5 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -71,6 +71,10 @@ extern void __restore_cpu_power7(void);
71extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); 71extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
72extern void __restore_cpu_power8(void); 72extern void __restore_cpu_power8(void);
73extern void __restore_cpu_a2(void); 73extern void __restore_cpu_a2(void);
74extern void __flush_tlb_power7(unsigned long inval_selector);
75extern void __flush_tlb_power8(unsigned long inval_selector);
76extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
77extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
74#endif /* CONFIG_PPC64 */ 78#endif /* CONFIG_PPC64 */
75#if defined(CONFIG_E500) 79#if defined(CONFIG_E500)
76extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); 80extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
@@ -440,6 +444,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
440 .oprofile_cpu_type = "ppc64/ibm-compat-v1", 444 .oprofile_cpu_type = "ppc64/ibm-compat-v1",
441 .cpu_setup = __setup_cpu_power7, 445 .cpu_setup = __setup_cpu_power7,
442 .cpu_restore = __restore_cpu_power7, 446 .cpu_restore = __restore_cpu_power7,
447 .flush_tlb = __flush_tlb_power7,
448 .machine_check_early = __machine_check_early_realmode_p7,
443 .platform = "power7", 449 .platform = "power7",
444 }, 450 },
445 { /* 2.07-compliant processor, i.e. Power8 "architected" mode */ 451 { /* 2.07-compliant processor, i.e. Power8 "architected" mode */
@@ -456,6 +462,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
456 .oprofile_cpu_type = "ppc64/ibm-compat-v1", 462 .oprofile_cpu_type = "ppc64/ibm-compat-v1",
457 .cpu_setup = __setup_cpu_power8, 463 .cpu_setup = __setup_cpu_power8,
458 .cpu_restore = __restore_cpu_power8, 464 .cpu_restore = __restore_cpu_power8,
465 .flush_tlb = __flush_tlb_power8,
466 .machine_check_early = __machine_check_early_realmode_p8,
459 .platform = "power8", 467 .platform = "power8",
460 }, 468 },
461 { /* Power7 */ 469 { /* Power7 */
@@ -474,6 +482,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
474 .oprofile_type = PPC_OPROFILE_POWER4, 482 .oprofile_type = PPC_OPROFILE_POWER4,
475 .cpu_setup = __setup_cpu_power7, 483 .cpu_setup = __setup_cpu_power7,
476 .cpu_restore = __restore_cpu_power7, 484 .cpu_restore = __restore_cpu_power7,
485 .flush_tlb = __flush_tlb_power7,
486 .machine_check_early = __machine_check_early_realmode_p7,
477 .platform = "power7", 487 .platform = "power7",
478 }, 488 },
479 { /* Power7+ */ 489 { /* Power7+ */
@@ -492,6 +502,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
492 .oprofile_type = PPC_OPROFILE_POWER4, 502 .oprofile_type = PPC_OPROFILE_POWER4,
493 .cpu_setup = __setup_cpu_power7, 503 .cpu_setup = __setup_cpu_power7,
494 .cpu_restore = __restore_cpu_power7, 504 .cpu_restore = __restore_cpu_power7,
505 .flush_tlb = __flush_tlb_power7,
506 .machine_check_early = __machine_check_early_realmode_p7,
495 .platform = "power7+", 507 .platform = "power7+",
496 }, 508 },
497 { /* Power8E */ 509 { /* Power8E */
@@ -510,6 +522,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
510 .oprofile_type = PPC_OPROFILE_INVALID, 522 .oprofile_type = PPC_OPROFILE_INVALID,
511 .cpu_setup = __setup_cpu_power8, 523 .cpu_setup = __setup_cpu_power8,
512 .cpu_restore = __restore_cpu_power8, 524 .cpu_restore = __restore_cpu_power8,
525 .flush_tlb = __flush_tlb_power8,
526 .machine_check_early = __machine_check_early_realmode_p8,
513 .platform = "power8", 527 .platform = "power8",
514 }, 528 },
515 { /* Power8 */ 529 { /* Power8 */
@@ -528,6 +542,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
528 .oprofile_type = PPC_OPROFILE_INVALID, 542 .oprofile_type = PPC_OPROFILE_INVALID,
529 .cpu_setup = __setup_cpu_power8, 543 .cpu_setup = __setup_cpu_power8,
530 .cpu_restore = __restore_cpu_power8, 544 .cpu_restore = __restore_cpu_power8,
545 .flush_tlb = __flush_tlb_power8,
546 .machine_check_early = __machine_check_early_realmode_p8,
531 .platform = "power8", 547 .platform = "power8",
532 }, 548 },
533 { /* Cell Broadband Engine */ 549 { /* Cell Broadband Engine */
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index fdcd8f551aff..18d7c80ddeb9 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -17,7 +17,6 @@
17#include <linux/export.h> 17#include <linux/export.h>
18#include <linux/crash_dump.h> 18#include <linux/crash_dump.h>
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/init.h>
21#include <linux/irq.h> 20#include <linux/irq.h>
22#include <linux/types.h> 21#include <linux/types.h>
23 22
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 11c1d069d920..7a13f378ca2c 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -98,17 +98,19 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
98 size_t csize, unsigned long offset, int userbuf) 98 size_t csize, unsigned long offset, int userbuf)
99{ 99{
100 void *vaddr; 100 void *vaddr;
101 phys_addr_t paddr;
101 102
102 if (!csize) 103 if (!csize)
103 return 0; 104 return 0;
104 105
105 csize = min_t(size_t, csize, PAGE_SIZE); 106 csize = min_t(size_t, csize, PAGE_SIZE);
107 paddr = pfn << PAGE_SHIFT;
106 108
107 if ((min_low_pfn < pfn) && (pfn < max_pfn)) { 109 if (memblock_is_region_memory(paddr, csize)) {
108 vaddr = __va(pfn << PAGE_SHIFT); 110 vaddr = __va(paddr);
109 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); 111 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
110 } else { 112 } else {
111 vaddr = __ioremap(pfn << PAGE_SHIFT, PAGE_SIZE, 0); 113 vaddr = __ioremap(paddr, PAGE_SIZE, 0);
112 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf); 114 csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
113 iounmap(vaddr); 115 iounmap(vaddr);
114 } 116 }
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index e4897523de41..54d0116256f7 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -83,10 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask)
83 return 0; 83 return 0;
84 } 84 }
85 85
86 if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) { 86 if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
87 dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); 87 dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
88 dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", 88 dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
89 mask, tbl->it_offset << IOMMU_PAGE_SHIFT); 89 mask, tbl->it_offset << tbl->it_page_shift);
90 return 0; 90 return 0;
91 } else 91 } else
92 return 1; 92 return 1;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 8032b97ccdcb..ee78f6e49d64 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -191,12 +191,10 @@ EXPORT_SYMBOL(dma_direct_ops);
191 191
192#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) 192#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
193 193
194int dma_set_mask(struct device *dev, u64 dma_mask) 194int __dma_set_mask(struct device *dev, u64 dma_mask)
195{ 195{
196 struct dma_map_ops *dma_ops = get_dma_ops(dev); 196 struct dma_map_ops *dma_ops = get_dma_ops(dev);
197 197
198 if (ppc_md.dma_set_mask)
199 return ppc_md.dma_set_mask(dev, dma_mask);
200 if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) 198 if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
201 return dma_ops->set_dma_mask(dev, dma_mask); 199 return dma_ops->set_dma_mask(dev, dma_mask);
202 if (!dev->dma_mask || !dma_supported(dev, dma_mask)) 200 if (!dev->dma_mask || !dma_supported(dev, dma_mask))
@@ -204,6 +202,12 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
204 *dev->dma_mask = dma_mask; 202 *dev->dma_mask = dma_mask;
205 return 0; 203 return 0;
206} 204}
205int dma_set_mask(struct device *dev, u64 dma_mask)
206{
207 if (ppc_md.dma_set_mask)
208 return ppc_md.dma_set_mask(dev, dma_mask);
209 return __dma_set_mask(dev, dma_mask);
210}
207EXPORT_SYMBOL(dma_set_mask); 211EXPORT_SYMBOL(dma_set_mask);
208 212
209u64 dma_get_required_mask(struct device *dev) 213u64 dma_get_required_mask(struct device *dev)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 4bd687d5e7aa..e7b76a6bf150 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -28,6 +28,7 @@
28#include <linux/pci.h> 28#include <linux/pci.h>
29#include <linux/proc_fs.h> 29#include <linux/proc_fs.h>
30#include <linux/rbtree.h> 30#include <linux/rbtree.h>
31#include <linux/reboot.h>
31#include <linux/seq_file.h> 32#include <linux/seq_file.h>
32#include <linux/spinlock.h> 33#include <linux/spinlock.h>
33#include <linux/export.h> 34#include <linux/export.h>
@@ -84,12 +85,12 @@
84#define EEH_MAX_FAILS 2100000 85#define EEH_MAX_FAILS 2100000
85 86
86/* Time to wait for a PCI slot to report status, in milliseconds */ 87/* Time to wait for a PCI slot to report status, in milliseconds */
87#define PCI_BUS_RESET_WAIT_MSEC (60*1000) 88#define PCI_BUS_RESET_WAIT_MSEC (5*60*1000)
88 89
89/* Platform dependent EEH operations */ 90/* Platform dependent EEH operations */
90struct eeh_ops *eeh_ops = NULL; 91struct eeh_ops *eeh_ops = NULL;
91 92
92int eeh_subsystem_enabled; 93bool eeh_subsystem_enabled = false;
93EXPORT_SYMBOL(eeh_subsystem_enabled); 94EXPORT_SYMBOL(eeh_subsystem_enabled);
94 95
95/* 96/*
@@ -364,7 +365,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
364 365
365 eeh_stats.total_mmio_ffs++; 366 eeh_stats.total_mmio_ffs++;
366 367
367 if (!eeh_subsystem_enabled) 368 if (!eeh_enabled())
368 return 0; 369 return 0;
369 370
370 if (!edev) { 371 if (!edev) {
@@ -747,6 +748,17 @@ int __exit eeh_ops_unregister(const char *name)
747 return -EEXIST; 748 return -EEXIST;
748} 749}
749 750
751static int eeh_reboot_notifier(struct notifier_block *nb,
752 unsigned long action, void *unused)
753{
754 eeh_set_enable(false);
755 return NOTIFY_DONE;
756}
757
758static struct notifier_block eeh_reboot_nb = {
759 .notifier_call = eeh_reboot_notifier,
760};
761
750/** 762/**
751 * eeh_init - EEH initialization 763 * eeh_init - EEH initialization
752 * 764 *
@@ -778,6 +790,14 @@ int eeh_init(void)
778 if (machine_is(powernv) && cnt++ <= 0) 790 if (machine_is(powernv) && cnt++ <= 0)
779 return ret; 791 return ret;
780 792
793 /* Register reboot notifier */
794 ret = register_reboot_notifier(&eeh_reboot_nb);
795 if (ret) {
796 pr_warn("%s: Failed to register notifier (%d)\n",
797 __func__, ret);
798 return ret;
799 }
800
781 /* call platform initialization function */ 801 /* call platform initialization function */
782 if (!eeh_ops) { 802 if (!eeh_ops) {
783 pr_warning("%s: Platform EEH operation not found\n", 803 pr_warning("%s: Platform EEH operation not found\n",
@@ -822,7 +842,7 @@ int eeh_init(void)
822 return ret; 842 return ret;
823 } 843 }
824 844
825 if (eeh_subsystem_enabled) 845 if (eeh_enabled())
826 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 846 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
827 else 847 else
828 pr_warning("EEH: No capable adapters found\n"); 848 pr_warning("EEH: No capable adapters found\n");
@@ -897,7 +917,7 @@ void eeh_add_device_late(struct pci_dev *dev)
897 struct device_node *dn; 917 struct device_node *dn;
898 struct eeh_dev *edev; 918 struct eeh_dev *edev;
899 919
900 if (!dev || !eeh_subsystem_enabled) 920 if (!dev || !eeh_enabled())
901 return; 921 return;
902 922
903 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 923 pr_debug("EEH: Adding device %s\n", pci_name(dev));
@@ -921,6 +941,13 @@ void eeh_add_device_late(struct pci_dev *dev)
921 eeh_sysfs_remove_device(edev->pdev); 941 eeh_sysfs_remove_device(edev->pdev);
922 edev->mode &= ~EEH_DEV_SYSFS; 942 edev->mode &= ~EEH_DEV_SYSFS;
923 943
944 /*
945 * We definitely should have the PCI device removed
946 * though it wasn't correctly. So we needn't call
947 * into error handler afterwards.
948 */
949 edev->mode |= EEH_DEV_NO_HANDLER;
950
924 edev->pdev = NULL; 951 edev->pdev = NULL;
925 dev->dev.archdata.edev = NULL; 952 dev->dev.archdata.edev = NULL;
926 } 953 }
@@ -998,7 +1025,7 @@ void eeh_remove_device(struct pci_dev *dev)
998{ 1025{
999 struct eeh_dev *edev; 1026 struct eeh_dev *edev;
1000 1027
1001 if (!dev || !eeh_subsystem_enabled) 1028 if (!dev || !eeh_enabled())
1002 return; 1029 return;
1003 edev = pci_dev_to_eeh_dev(dev); 1030 edev = pci_dev_to_eeh_dev(dev);
1004 1031
@@ -1023,6 +1050,14 @@ void eeh_remove_device(struct pci_dev *dev)
1023 else 1050 else
1024 edev->mode |= EEH_DEV_DISCONNECTED; 1051 edev->mode |= EEH_DEV_DISCONNECTED;
1025 1052
1053 /*
1054 * We're removing from the PCI subsystem, that means
1055 * the PCI device driver can't support EEH or not
1056 * well. So we rely on hotplug completely to do recovery
1057 * for the specific PCI device.
1058 */
1059 edev->mode |= EEH_DEV_NO_HANDLER;
1060
1026 eeh_addr_cache_rmv_dev(dev); 1061 eeh_addr_cache_rmv_dev(dev);
1027 eeh_sysfs_remove_device(dev); 1062 eeh_sysfs_remove_device(dev);
1028 edev->mode &= ~EEH_DEV_SYSFS; 1063 edev->mode &= ~EEH_DEV_SYSFS;
@@ -1030,7 +1065,7 @@ void eeh_remove_device(struct pci_dev *dev)
1030 1065
1031static int proc_eeh_show(struct seq_file *m, void *v) 1066static int proc_eeh_show(struct seq_file *m, void *v)
1032{ 1067{
1033 if (0 == eeh_subsystem_enabled) { 1068 if (!eeh_enabled()) {
1034 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1069 seq_printf(m, "EEH Subsystem is globally disabled\n");
1035 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1070 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
1036 } else { 1071 } else {
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 36bed5a12750..fdc679d309ec 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -217,7 +217,8 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
217 if (!driver) return NULL; 217 if (!driver) return NULL;
218 218
219 if (!driver->err_handler || 219 if (!driver->err_handler ||
220 !driver->err_handler->mmio_enabled) { 220 !driver->err_handler->mmio_enabled ||
221 (edev->mode & EEH_DEV_NO_HANDLER)) {
221 eeh_pcid_put(dev); 222 eeh_pcid_put(dev);
222 return NULL; 223 return NULL;
223 } 224 }
@@ -258,7 +259,8 @@ static void *eeh_report_reset(void *data, void *userdata)
258 eeh_enable_irq(dev); 259 eeh_enable_irq(dev);
259 260
260 if (!driver->err_handler || 261 if (!driver->err_handler ||
261 !driver->err_handler->slot_reset) { 262 !driver->err_handler->slot_reset ||
263 (edev->mode & EEH_DEV_NO_HANDLER)) {
262 eeh_pcid_put(dev); 264 eeh_pcid_put(dev);
263 return NULL; 265 return NULL;
264 } 266 }
@@ -297,7 +299,9 @@ static void *eeh_report_resume(void *data, void *userdata)
297 eeh_enable_irq(dev); 299 eeh_enable_irq(dev);
298 300
299 if (!driver->err_handler || 301 if (!driver->err_handler ||
300 !driver->err_handler->resume) { 302 !driver->err_handler->resume ||
303 (edev->mode & EEH_DEV_NO_HANDLER)) {
304 edev->mode &= ~EEH_DEV_NO_HANDLER;
301 eeh_pcid_put(dev); 305 eeh_pcid_put(dev);
302 return NULL; 306 return NULL;
303 } 307 }
@@ -358,9 +362,13 @@ static void *eeh_rmv_device(void *data, void *userdata)
358 */ 362 */
359 if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) 363 if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
360 return NULL; 364 return NULL;
365
361 driver = eeh_pcid_get(dev); 366 driver = eeh_pcid_get(dev);
362 if (driver && driver->err_handler) 367 if (driver) {
363 return NULL; 368 eeh_pcid_put(dev);
369 if (driver->err_handler)
370 return NULL;
371 }
364 372
365 /* Remove it from PCI subsystem */ 373 /* Remove it from PCI subsystem */
366 pr_debug("EEH: Removing %s without EEH sensitive driver\n", 374 pr_debug("EEH: Removing %s without EEH sensitive driver\n",
@@ -369,7 +377,9 @@ static void *eeh_rmv_device(void *data, void *userdata)
369 edev->mode |= EEH_DEV_DISCONNECTED; 377 edev->mode |= EEH_DEV_DISCONNECTED;
370 (*removed)++; 378 (*removed)++;
371 379
380 pci_lock_rescan_remove();
372 pci_stop_and_remove_bus_device(dev); 381 pci_stop_and_remove_bus_device(dev);
382 pci_unlock_rescan_remove();
373 383
374 return NULL; 384 return NULL;
375} 385}
@@ -416,10 +426,13 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
416 * into pcibios_add_pci_devices(). 426 * into pcibios_add_pci_devices().
417 */ 427 */
418 eeh_pe_state_mark(pe, EEH_PE_KEEP); 428 eeh_pe_state_mark(pe, EEH_PE_KEEP);
419 if (bus) 429 if (bus) {
430 pci_lock_rescan_remove();
420 pcibios_remove_pci_devices(bus); 431 pcibios_remove_pci_devices(bus);
421 else if (frozen_bus) 432 pci_unlock_rescan_remove();
433 } else if (frozen_bus) {
422 eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed); 434 eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
435 }
423 436
424 /* Reset the pci controller. (Asserts RST#; resets config space). 437 /* Reset the pci controller. (Asserts RST#; resets config space).
425 * Reconfigure bridges and devices. Don't try to bring the system 438 * Reconfigure bridges and devices. Don't try to bring the system
@@ -429,6 +442,8 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
429 if (rc) 442 if (rc)
430 return rc; 443 return rc;
431 444
445 pci_lock_rescan_remove();
446
432 /* Restore PE */ 447 /* Restore PE */
433 eeh_ops->configure_bridge(pe); 448 eeh_ops->configure_bridge(pe);
434 eeh_pe_restore_bars(pe); 449 eeh_pe_restore_bars(pe);
@@ -462,13 +477,14 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
462 pe->tstamp = tstamp; 477 pe->tstamp = tstamp;
463 pe->freeze_count = cnt; 478 pe->freeze_count = cnt;
464 479
480 pci_unlock_rescan_remove();
465 return 0; 481 return 0;
466} 482}
467 483
468/* The longest amount of time to wait for a pci device 484/* The longest amount of time to wait for a pci device
469 * to come back on line, in seconds. 485 * to come back on line, in seconds.
470 */ 486 */
471#define MAX_WAIT_FOR_RECOVERY 150 487#define MAX_WAIT_FOR_RECOVERY 300
472 488
473static void eeh_handle_normal_event(struct eeh_pe *pe) 489static void eeh_handle_normal_event(struct eeh_pe *pe)
474{ 490{
@@ -618,92 +634,103 @@ perm_error:
618 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); 634 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
619 635
620 /* Shut down the device drivers for good. */ 636 /* Shut down the device drivers for good. */
621 if (frozen_bus) 637 if (frozen_bus) {
638 pci_lock_rescan_remove();
622 pcibios_remove_pci_devices(frozen_bus); 639 pcibios_remove_pci_devices(frozen_bus);
640 pci_unlock_rescan_remove();
641 }
623} 642}
624 643
625static void eeh_handle_special_event(void) 644static void eeh_handle_special_event(void)
626{ 645{
627 struct eeh_pe *pe, *phb_pe; 646 struct eeh_pe *pe, *phb_pe;
628 struct pci_bus *bus; 647 struct pci_bus *bus;
629 struct pci_controller *hose, *tmp; 648 struct pci_controller *hose;
630 unsigned long flags; 649 unsigned long flags;
631 int rc = 0; 650 int rc;
632 651
633 /*
634 * The return value from next_error() has been classified as follows.
635 * It might be good to enumerate them. However, next_error() is only
636 * supported by PowerNV platform for now. So it would be fine to use
637 * integer directly:
638 *
639 * 4 - Dead IOC 3 - Dead PHB
640 * 2 - Fenced PHB 1 - Frozen PE
641 * 0 - No error found
642 *
643 */
644 rc = eeh_ops->next_error(&pe);
645 if (rc <= 0)
646 return;
647 652
648 switch (rc) { 653 do {
649 case 4: 654 rc = eeh_ops->next_error(&pe);
650 /* Mark all PHBs in dead state */ 655
651 eeh_serialize_lock(&flags); 656 switch (rc) {
652 list_for_each_entry_safe(hose, tmp, 657 case EEH_NEXT_ERR_DEAD_IOC:
653 &hose_list, list_node) { 658 /* Mark all PHBs in dead state */
654 phb_pe = eeh_phb_pe_get(hose); 659 eeh_serialize_lock(&flags);
655 if (!phb_pe) continue; 660
656 661 /* Purge all events */
657 eeh_pe_state_mark(phb_pe, 662 eeh_remove_event(NULL);
658 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); 663
664 list_for_each_entry(hose, &hose_list, list_node) {
665 phb_pe = eeh_phb_pe_get(hose);
666 if (!phb_pe) continue;
667
668 eeh_pe_state_mark(phb_pe,
669 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
670 }
671
672 eeh_serialize_unlock(flags);
673
674 break;
675 case EEH_NEXT_ERR_FROZEN_PE:
676 case EEH_NEXT_ERR_FENCED_PHB:
677 case EEH_NEXT_ERR_DEAD_PHB:
678 /* Mark the PE in fenced state */
679 eeh_serialize_lock(&flags);
680
681 /* Purge all events of the PHB */
682 eeh_remove_event(pe);
683
684 if (rc == EEH_NEXT_ERR_DEAD_PHB)
685 eeh_pe_state_mark(pe,
686 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
687 else
688 eeh_pe_state_mark(pe,
689 EEH_PE_ISOLATED | EEH_PE_RECOVERING);
690
691 eeh_serialize_unlock(flags);
692
693 break;
694 case EEH_NEXT_ERR_NONE:
695 return;
696 default:
697 pr_warn("%s: Invalid value %d from next_error()\n",
698 __func__, rc);
699 return;
659 } 700 }
660 eeh_serialize_unlock(flags);
661
662 /* Purge all events */
663 eeh_remove_event(NULL);
664 break;
665 case 3:
666 case 2:
667 case 1:
668 /* Mark the PE in fenced state */
669 eeh_serialize_lock(&flags);
670 if (rc == 3)
671 eeh_pe_state_mark(pe,
672 EEH_PE_ISOLATED | EEH_PE_PHB_DEAD);
673 else
674 eeh_pe_state_mark(pe,
675 EEH_PE_ISOLATED | EEH_PE_RECOVERING);
676 eeh_serialize_unlock(flags);
677
678 /* Purge all events of the PHB */
679 eeh_remove_event(pe);
680 break;
681 default:
682 pr_err("%s: Invalid value %d from next_error()\n",
683 __func__, rc);
684 return;
685 }
686 701
687 /* 702 /*
688 * For fenced PHB and frozen PE, it's handled as normal 703 * For fenced PHB and frozen PE, it's handled as normal
689 * event. We have to remove the affected PHBs for dead 704 * event. We have to remove the affected PHBs for dead
690 * PHB and IOC 705 * PHB and IOC
691 */ 706 */
692 if (rc == 2 || rc == 1) 707 if (rc == EEH_NEXT_ERR_FROZEN_PE ||
693 eeh_handle_normal_event(pe); 708 rc == EEH_NEXT_ERR_FENCED_PHB) {
694 else { 709 eeh_handle_normal_event(pe);
695 list_for_each_entry_safe(hose, tmp, 710 } else {
696 &hose_list, list_node) { 711 pci_lock_rescan_remove();
697 phb_pe = eeh_phb_pe_get(hose); 712 list_for_each_entry(hose, &hose_list, list_node) {
698 if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) 713 phb_pe = eeh_phb_pe_get(hose);
699 continue; 714 if (!phb_pe ||
700 715 !(phb_pe->state & EEH_PE_PHB_DEAD))
701 bus = eeh_pe_bus_get(phb_pe); 716 continue;
702 /* Notify all devices that they're about to go down. */ 717
703 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); 718 /* Notify all devices to be down */
704 pcibios_remove_pci_devices(bus); 719 bus = eeh_pe_bus_get(phb_pe);
720 eeh_pe_dev_traverse(pe,
721 eeh_report_failure, NULL);
722 pcibios_remove_pci_devices(bus);
723 }
724 pci_unlock_rescan_remove();
705 } 725 }
706 } 726
727 /*
728 * If we have detected dead IOC, we needn't proceed
729 * any more since all PHBs would have been removed
730 */
731 if (rc == EEH_NEXT_ERR_DEAD_IOC)
732 break;
733 } while (rc != EEH_NEXT_ERR_NONE);
707} 734}
708 735
709/** 736/**
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index f9450537e335..f0c353fa655a 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -25,7 +25,6 @@
25#include <linux/delay.h> 25#include <linux/delay.h>
26#include <linux/export.h> 26#include <linux/export.h>
27#include <linux/gfp.h> 27#include <linux/gfp.h>
28#include <linux/init.h>
29#include <linux/kernel.h> 28#include <linux/kernel.h>
30#include <linux/pci.h> 29#include <linux/pci.h>
31#include <linux/string.h> 30#include <linux/string.h>
@@ -737,6 +736,9 @@ static void *eeh_restore_one_device_bars(void *data, void *flag)
737 else 736 else
738 eeh_restore_device_bars(edev, dn); 737 eeh_restore_device_bars(edev, dn);
739 738
739 if (eeh_ops->restore_config)
740 eeh_ops->restore_config(dn);
741
740 return NULL; 742 return NULL;
741} 743}
742 744
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index bbfb0294b354..662c6dd98072 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -664,8 +664,16 @@ _GLOBAL(ret_from_except_lite)
664 bl .restore_interrupts 664 bl .restore_interrupts
665 SCHEDULE_USER 665 SCHEDULE_USER
666 b .ret_from_except_lite 666 b .ret_from_except_lite
667 6672:
6682: bl .save_nvgprs 668#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
669 andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
670 bne 3f /* only restore TM if nothing else to do */
671 addi r3,r1,STACK_FRAME_OVERHEAD
672 bl .restore_tm_state
673 b restore
6743:
675#endif
676 bl .save_nvgprs
669 bl .restore_interrupts 677 bl .restore_interrupts
670 addi r3,r1,STACK_FRAME_OVERHEAD 678 addi r3,r1,STACK_FRAME_OVERHEAD
671 bl .do_notify_resume 679 bl .do_notify_resume
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index e7751561fd1d..063b65dd4f27 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -308,6 +308,7 @@ interrupt_base_book3e: /* fake trap */
308 EXCEPTION_STUB(0x2e0, guest_doorbell_crit) 308 EXCEPTION_STUB(0x2e0, guest_doorbell_crit)
309 EXCEPTION_STUB(0x300, hypercall) 309 EXCEPTION_STUB(0x300, hypercall)
310 EXCEPTION_STUB(0x320, ehpriv) 310 EXCEPTION_STUB(0x320, ehpriv)
311 EXCEPTION_STUB(0x340, lrat_error)
311 312
312 .globl interrupt_end_book3e 313 .globl interrupt_end_book3e
313interrupt_end_book3e: 314interrupt_end_book3e:
@@ -677,6 +678,17 @@ kernel_dbg_exc:
677 bl .unknown_exception 678 bl .unknown_exception
678 b .ret_from_except 679 b .ret_from_except
679 680
681/* LRAT Error interrupt */
682 START_EXCEPTION(lrat_error);
683 NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
684 PROLOG_ADDITION_NONE)
685 EXCEPTION_COMMON(0x340, PACA_EXGEN, INTS_KEEP)
686 addi r3,r1,STACK_FRAME_OVERHEAD
687 bl .save_nvgprs
688 INTS_RESTORE_HARD
689 bl .unknown_exception
690 b .ret_from_except
691
680/* 692/*
681 * An interrupt came in while soft-disabled; We mark paca->irq_happened 693 * An interrupt came in while soft-disabled; We mark paca->irq_happened
682 * accordingly and if the interrupt is level sensitive, we hard disable 694 * accordingly and if the interrupt is level sensitive, we hard disable
@@ -859,6 +871,7 @@ BAD_STACK_TRAMPOLINE(0x2e0)
859BAD_STACK_TRAMPOLINE(0x300) 871BAD_STACK_TRAMPOLINE(0x300)
860BAD_STACK_TRAMPOLINE(0x310) 872BAD_STACK_TRAMPOLINE(0x310)
861BAD_STACK_TRAMPOLINE(0x320) 873BAD_STACK_TRAMPOLINE(0x320)
874BAD_STACK_TRAMPOLINE(0x340)
862BAD_STACK_TRAMPOLINE(0x400) 875BAD_STACK_TRAMPOLINE(0x400)
863BAD_STACK_TRAMPOLINE(0x500) 876BAD_STACK_TRAMPOLINE(0x500)
864BAD_STACK_TRAMPOLINE(0x600) 877BAD_STACK_TRAMPOLINE(0x600)
@@ -1055,12 +1068,9 @@ skpinv: addi r6,r6,1 /* Increment */
1055 mtspr SPRN_MAS0,r3 1068 mtspr SPRN_MAS0,r3
1056 tlbre 1069 tlbre
1057 mfspr r6,SPRN_MAS1 1070 mfspr r6,SPRN_MAS1
1058 rlwinm r6,r6,0,2,0 /* clear IPROT */ 1071 rlwinm r6,r6,0,2,31 /* clear IPROT and VALID */
1059 mtspr SPRN_MAS1,r6 1072 mtspr SPRN_MAS1,r6
1060 tlbwe 1073 tlbwe
1061
1062 /* Invalidate TLB1 */
1063 PPC_TLBILX_ALL(0,R0)
1064 sync 1074 sync
1065 isync 1075 isync
1066 1076
@@ -1114,12 +1124,9 @@ skpinv: addi r6,r6,1 /* Increment */
1114 mtspr SPRN_MAS0,r4 1124 mtspr SPRN_MAS0,r4
1115 tlbre 1125 tlbre
1116 mfspr r5,SPRN_MAS1 1126 mfspr r5,SPRN_MAS1
1117 rlwinm r5,r5,0,2,0 /* clear IPROT */ 1127 rlwinm r5,r5,0,2,31 /* clear IPROT and VALID */
1118 mtspr SPRN_MAS1,r5 1128 mtspr SPRN_MAS1,r5
1119 tlbwe 1129 tlbwe
1120
1121 /* Invalidate TLB1 */
1122 PPC_TLBILX_ALL(0,R0)
1123 sync 1130 sync
1124 isync 1131 isync
1125 1132
@@ -1414,3 +1421,7 @@ _GLOBAL(setup_ehv_ivors)
1414 SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */ 1421 SET_IVOR(38, 0x2c0) /* Guest Processor Doorbell */
1415 SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */ 1422 SET_IVOR(39, 0x2e0) /* Guest Processor Doorbell Crit/MC */
1416 blr 1423 blr
1424
1425_GLOBAL(setup_lrat_ivor)
1426 SET_IVOR(42, 0x340) /* LRAT Error */
1427 blr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9f905e40922e..38d507306a11 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -155,8 +155,30 @@ machine_check_pSeries_1:
155 */ 155 */
156 HMT_MEDIUM_PPR_DISCARD 156 HMT_MEDIUM_PPR_DISCARD
157 SET_SCRATCH0(r13) /* save r13 */ 157 SET_SCRATCH0(r13) /* save r13 */
158#ifdef CONFIG_PPC_P7_NAP
159BEGIN_FTR_SECTION
160 /* Running native on arch 2.06 or later, check if we are
161 * waking up from nap. We only handle no state loss and
162 * supervisor state loss. We do -not- handle hypervisor
163 * state loss at this time.
164 */
165 mfspr r13,SPRN_SRR1
166 rlwinm. r13,r13,47-31,30,31
167 beq 9f
168
169 /* waking up from powersave (nap) state */
170 cmpwi cr1,r13,2
171 /* Total loss of HV state is fatal. let's just stay stuck here */
172 bgt cr1,.
1739:
174END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
175#endif /* CONFIG_PPC_P7_NAP */
158 EXCEPTION_PROLOG_0(PACA_EXMC) 176 EXCEPTION_PROLOG_0(PACA_EXMC)
177BEGIN_FTR_SECTION
178 b machine_check_pSeries_early
179FTR_SECTION_ELSE
159 b machine_check_pSeries_0 180 b machine_check_pSeries_0
181ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
160 182
161 . = 0x300 183 . = 0x300
162 .globl data_access_pSeries 184 .globl data_access_pSeries
@@ -405,6 +427,64 @@ denorm_exception_hv:
405 427
406 .align 7 428 .align 7
407 /* moved from 0x200 */ 429 /* moved from 0x200 */
430machine_check_pSeries_early:
431BEGIN_FTR_SECTION
432 EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
433 /*
434 * Register contents:
435 * R13 = PACA
436 * R9 = CR
437 * Original R9 to R13 is saved on PACA_EXMC
438 *
439 * Switch to mc_emergency stack and handle re-entrancy (though we
440 * currently don't test for overflow). Save MCE registers srr1,
441 * srr0, dar and dsisr and then set ME=1
442 *
443 * We use paca->in_mce to check whether this is the first entry or
444 * nested machine check. We increment paca->in_mce to track nested
445 * machine checks.
446 *
447 * If this is the first entry then set stack pointer to
448 * paca->mc_emergency_sp, otherwise r1 is already pointing to
449 * stack frame on mc_emergency stack.
450 *
451 * NOTE: We are here with MSR_ME=0 (off), which means we risk a
452 * checkstop if we get another machine check exception before we do
453 * rfid with MSR_ME=1.
454 */
455 mr r11,r1 /* Save r1 */
456 lhz r10,PACA_IN_MCE(r13)
457 cmpwi r10,0 /* Are we in nested machine check */
458 bne 0f /* Yes, we are. */
459 /* First machine check entry */
460 ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
4610: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
462 addi r10,r10,1 /* increment paca->in_mce */
463 sth r10,PACA_IN_MCE(r13)
464 std r11,GPR1(r1) /* Save r1 on the stack. */
465 std r11,0(r1) /* make stack chain pointer */
466 mfspr r11,SPRN_SRR0 /* Save SRR0 */
467 std r11,_NIP(r1)
468 mfspr r11,SPRN_SRR1 /* Save SRR1 */
469 std r11,_MSR(r1)
470 mfspr r11,SPRN_DAR /* Save DAR */
471 std r11,_DAR(r1)
472 mfspr r11,SPRN_DSISR /* Save DSISR */
473 std r11,_DSISR(r1)
474 std r9,_CCR(r1) /* Save CR in stackframe */
475 /* Save r9 through r13 from EXMC save area to stack frame. */
476 EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
477 mfmsr r11 /* get MSR value */
478 ori r11,r11,MSR_ME /* turn on ME bit */
479 ori r11,r11,MSR_RI /* turn on RI bit */
480 ld r12,PACAKBASE(r13) /* get high part of &label */
481 LOAD_HANDLER(r12, machine_check_handle_early)
482 mtspr SPRN_SRR0,r12
483 mtspr SPRN_SRR1,r11
484 rfid
485 b . /* prevent speculative execution */
486END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
487
408machine_check_pSeries: 488machine_check_pSeries:
409 .globl machine_check_fwnmi 489 .globl machine_check_fwnmi
410machine_check_fwnmi: 490machine_check_fwnmi:
@@ -688,30 +768,6 @@ kvmppc_skip_Hinterrupt:
688 768
689 STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) 769 STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception)
690 770
691 /*
692 * Machine check is different because we use a different
693 * save area: PACA_EXMC instead of PACA_EXGEN.
694 */
695 .align 7
696 .globl machine_check_common
697machine_check_common:
698
699 mfspr r10,SPRN_DAR
700 std r10,PACA_EXGEN+EX_DAR(r13)
701 mfspr r10,SPRN_DSISR
702 stw r10,PACA_EXGEN+EX_DSISR(r13)
703 EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
704 FINISH_NAP
705 DISABLE_INTS
706 ld r3,PACA_EXGEN+EX_DAR(r13)
707 lwz r4,PACA_EXGEN+EX_DSISR(r13)
708 std r3,_DAR(r1)
709 std r4,_DSISR(r1)
710 bl .save_nvgprs
711 addi r3,r1,STACK_FRAME_OVERHEAD
712 bl .machine_check_exception
713 b .ret_from_except
714
715 STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ) 771 STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
716 STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt) 772 STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, .timer_interrupt)
717 STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt) 773 STD_EXCEPTION_COMMON(0x980, hdecrementer, .hdec_interrupt)
@@ -1080,6 +1136,30 @@ unrecov_user_slb:
1080#endif /* __DISABLED__ */ 1136#endif /* __DISABLED__ */
1081 1137
1082 1138
1139 /*
1140 * Machine check is different because we use a different
1141 * save area: PACA_EXMC instead of PACA_EXGEN.
1142 */
1143 .align 7
1144 .globl machine_check_common
1145machine_check_common:
1146
1147 mfspr r10,SPRN_DAR
1148 std r10,PACA_EXGEN+EX_DAR(r13)
1149 mfspr r10,SPRN_DSISR
1150 stw r10,PACA_EXGEN+EX_DSISR(r13)
1151 EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
1152 FINISH_NAP
1153 DISABLE_INTS
1154 ld r3,PACA_EXGEN+EX_DAR(r13)
1155 lwz r4,PACA_EXGEN+EX_DSISR(r13)
1156 std r3,_DAR(r1)
1157 std r4,_DSISR(r1)
1158 bl .save_nvgprs
1159 addi r3,r1,STACK_FRAME_OVERHEAD
1160 bl .machine_check_exception
1161 b .ret_from_except
1162
1083 .align 7 1163 .align 7
1084 .globl alignment_common 1164 .globl alignment_common
1085alignment_common: 1165alignment_common:
@@ -1263,6 +1343,120 @@ _GLOBAL(opal_mc_secondary_handler)
1263#endif /* CONFIG_PPC_POWERNV */ 1343#endif /* CONFIG_PPC_POWERNV */
1264 1344
1265 1345
1346#define MACHINE_CHECK_HANDLER_WINDUP \
1347 /* Clear MSR_RI before setting SRR0 and SRR1. */\
1348 li r0,MSR_RI; \
1349 mfmsr r9; /* get MSR value */ \
1350 andc r9,r9,r0; \
1351 mtmsrd r9,1; /* Clear MSR_RI */ \
1352 /* Move original SRR0 and SRR1 into the respective regs */ \
1353 ld r9,_MSR(r1); \
1354 mtspr SPRN_SRR1,r9; \
1355 ld r3,_NIP(r1); \
1356 mtspr SPRN_SRR0,r3; \
1357 ld r9,_CTR(r1); \
1358 mtctr r9; \
1359 ld r9,_XER(r1); \
1360 mtxer r9; \
1361 ld r9,_LINK(r1); \
1362 mtlr r9; \
1363 REST_GPR(0, r1); \
1364 REST_8GPRS(2, r1); \
1365 REST_GPR(10, r1); \
1366 ld r11,_CCR(r1); \
1367 mtcr r11; \
1368 /* Decrement paca->in_mce. */ \
1369 lhz r12,PACA_IN_MCE(r13); \
1370 subi r12,r12,1; \
1371 sth r12,PACA_IN_MCE(r13); \
1372 REST_GPR(11, r1); \
1373 REST_2GPRS(12, r1); \
1374 /* restore original r1. */ \
1375 ld r1,GPR1(r1)
1376
1377 /*
1378 * Handle machine check early in real mode. We come here with
1379 * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
1380 */
1381 .align 7
1382 .globl machine_check_handle_early
1383machine_check_handle_early:
1384 std r0,GPR0(r1) /* Save r0 */
1385 EXCEPTION_PROLOG_COMMON_3(0x200)
1386 bl .save_nvgprs
1387 addi r3,r1,STACK_FRAME_OVERHEAD
1388 bl .machine_check_early
1389 ld r12,_MSR(r1)
1390#ifdef CONFIG_PPC_P7_NAP
1391 /*
1392 * Check if thread was in power saving mode. We come here when any
1393 * of the following is true:
1394 * a. thread wasn't in power saving mode
1395 * b. thread was in power saving mode with no state loss or
1396 * supervisor state loss
1397 *
1398 * Go back to nap again if (b) is true.
1399 */
1400 rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
1401 beq 4f /* No, it wasn;t */
1402 /* Thread was in power saving mode. Go back to nap again. */
1403 cmpwi r11,2
1404 bne 3f
1405 /* Supervisor state loss */
1406 li r0,1
1407 stb r0,PACA_NAPSTATELOST(r13)
14083: bl .machine_check_queue_event
1409 MACHINE_CHECK_HANDLER_WINDUP
1410 GET_PACA(r13)
1411 ld r1,PACAR1(r13)
1412 b .power7_enter_nap_mode
14134:
1414#endif
1415 /*
1416 * Check if we are coming from hypervisor userspace. If yes then we
1417 * continue in host kernel in V mode to deliver the MC event.
1418 */
1419 rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
1420 beq 5f
1421 andi. r11,r12,MSR_PR /* See if coming from user. */
1422 bne 9f /* continue in V mode if we are. */
1423
14245:
1425#ifdef CONFIG_KVM_BOOK3S_64_HV
1426 /*
1427 * We are coming from kernel context. Check if we are coming from
1428 * guest. if yes, then we can continue. We will fall through
1429 * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
1430 */
1431 lbz r11,HSTATE_IN_GUEST(r13)
1432 cmpwi r11,0 /* Check if coming from guest */
1433 bne 9f /* continue if we are. */
1434#endif
1435 /*
1436 * At this point we are not sure about what context we come from.
1437 * Queue up the MCE event and return from the interrupt.
1438 * But before that, check if this is an un-recoverable exception.
1439 * If yes, then stay on emergency stack and panic.
1440 */
1441 andi. r11,r12,MSR_RI
1442 bne 2f
14431: addi r3,r1,STACK_FRAME_OVERHEAD
1444 bl .unrecoverable_exception
1445 b 1b
14462:
1447 /*
1448 * Return from MC interrupt.
1449 * Queue up the MCE event so that we can log it later, while
1450 * returning from kernel or opal call.
1451 */
1452 bl .machine_check_queue_event
1453 MACHINE_CHECK_HANDLER_WINDUP
1454 rfid
14559:
1456 /* Deliver the machine check to host kernel in V mode. */
1457 MACHINE_CHECK_HANDLER_WINDUP
1458 b machine_check_pSeries
1459
1266/* 1460/*
1267 * r13 points to the PACA, r9 contains the saved CR, 1461 * r13 points to the PACA, r9 contains the saved CR,
1268 * r12 contain the saved SRR1, SRR0 is still ready for return 1462 * r12 contain the saved SRR1, SRR0 is still ready for return
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index f7f5b8bed68f..9ad236e5d2c9 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -81,6 +81,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
81#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 81#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
82 82
83/* 83/*
84 * Enable use of the FPU, and VSX if possible, for the caller.
85 */
86_GLOBAL(fp_enable)
87 mfmsr r3
88 ori r3,r3,MSR_FP
89#ifdef CONFIG_VSX
90BEGIN_FTR_SECTION
91 oris r3,r3,MSR_VSX@h
92END_FTR_SECTION_IFSET(CPU_FTR_VSX)
93#endif
94 SYNC
95 MTMSRD(r3)
96 isync /* (not necessary for arch 2.02 and later) */
97 blr
98
99/*
84 * Load state from memory into FP registers including FPSCR. 100 * Load state from memory into FP registers including FPSCR.
85 * Assumes the caller has enabled FP in the MSR. 101 * Assumes the caller has enabled FP in the MSR.
86 */ 102 */
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
index a92c79be2728..f22e7e44fbf3 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/fsl_booke_entry_mapping.S
@@ -176,6 +176,8 @@ skpinv: addi r6,r6,1 /* Increment */
176/* 7. Jump to KERNELBASE mapping */ 176/* 7. Jump to KERNELBASE mapping */
177 lis r6,(KERNELBASE & ~0xfff)@h 177 lis r6,(KERNELBASE & ~0xfff)@h
178 ori r6,r6,(KERNELBASE & ~0xfff)@l 178 ori r6,r6,(KERNELBASE & ~0xfff)@l
179 rlwinm r7,r25,0,0x03ffffff
180 add r6,r7,r6
179 181
180#elif defined(ENTRY_MAPPING_KEXEC_SETUP) 182#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
181/* 183/*
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 9b27b293a922..b0ded97ee4e1 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -74,6 +74,7 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
74 */ 74 */
75static int test_24bit_addr(unsigned long ip, unsigned long addr) 75static int test_24bit_addr(unsigned long ip, unsigned long addr)
76{ 76{
77 addr = ppc_function_entry((void *)addr);
77 78
78 /* use the create_branch to verify that this offset can be branched */ 79 /* use the create_branch to verify that this offset can be branched */
79 return create_branch((unsigned int *)ip, addr, 0); 80 return create_branch((unsigned int *)ip, addr, 0);
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 4f0946de2d5c..b7363bd42452 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -23,6 +23,7 @@
23 */ 23 */
24 24
25#include <linux/threads.h> 25#include <linux/threads.h>
26#include <linux/init.h>
26#include <asm/reg.h> 27#include <asm/reg.h>
27#include <asm/page.h> 28#include <asm/page.h>
28#include <asm/mmu.h> 29#include <asm/mmu.h>
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index f45726a1d963..b497188a94a1 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -65,29 +65,78 @@ _ENTRY(_start);
65 nop 65 nop
66 66
67 /* Translate device tree address to physical, save in r30/r31 */ 67 /* Translate device tree address to physical, save in r30/r31 */
68 mfmsr r16 68 bl get_phys_addr
69 mfspr r17,SPRN_PID 69 mr r30,r3
70 rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */ 70 mr r31,r4
71 rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
72 mtspr SPRN_MAS6,r17
73
74 tlbsx 0,r3 /* must succeed */
75
76 mfspr r16,SPRN_MAS1
77 mfspr r20,SPRN_MAS3
78 rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */
79 li r18,1024
80 slw r18,r18,r17 /* r18 = page size */
81 addi r18,r18,-1
82 and r19,r3,r18 /* r19 = page offset */
83 andc r31,r20,r18 /* r31 = page base */
84 or r31,r31,r19 /* r31 = devtree phys addr */
85 mfspr r30,SPRN_MAS7
86 71
87 li r25,0 /* phys kernel start (low) */ 72 li r25,0 /* phys kernel start (low) */
88 li r24,0 /* CPU number */ 73 li r24,0 /* CPU number */
89 li r23,0 /* phys kernel start (high) */ 74 li r23,0 /* phys kernel start (high) */
90 75
76#ifdef CONFIG_RELOCATABLE
77 LOAD_REG_ADDR_PIC(r3, _stext) /* Get our current runtime base */
78
79 /* Translate _stext address to physical, save in r23/r25 */
80 bl get_phys_addr
81 mr r23,r3
82 mr r25,r4
83
84 bl 0f
850: mflr r8
86 addis r3,r8,(is_second_reloc - 0b)@ha
87 lwz r19,(is_second_reloc - 0b)@l(r3)
88
89 /* Check if this is the second relocation. */
90 cmpwi r19,1
91 bne 1f
92
93 /*
94 * For the second relocation, we already get the real memstart_addr
95 * from device tree. So we will map PAGE_OFFSET to memstart_addr,
96 * then the virtual address of start kernel should be:
97 * PAGE_OFFSET + (kernstart_addr - memstart_addr)
98 * Since the offset between kernstart_addr and memstart_addr should
99 * never be beyond 1G, so we can just use the lower 32bit of them
100 * for the calculation.
101 */
102 lis r3,PAGE_OFFSET@h
103
104 addis r4,r8,(kernstart_addr - 0b)@ha
105 addi r4,r4,(kernstart_addr - 0b)@l
106 lwz r5,4(r4)
107
108 addis r6,r8,(memstart_addr - 0b)@ha
109 addi r6,r6,(memstart_addr - 0b)@l
110 lwz r7,4(r6)
111
112 subf r5,r7,r5
113 add r3,r3,r5
114 b 2f
115
1161:
117 /*
118 * We have the runtime (virutal) address of our base.
119 * We calculate our shift of offset from a 64M page.
120 * We could map the 64M page we belong to at PAGE_OFFSET and
121 * get going from there.
122 */
123 lis r4,KERNELBASE@h
124 ori r4,r4,KERNELBASE@l
125 rlwinm r6,r25,0,0x3ffffff /* r6 = PHYS_START % 64M */
126 rlwinm r5,r4,0,0x3ffffff /* r5 = KERNELBASE % 64M */
127 subf r3,r5,r6 /* r3 = r6 - r5 */
128 add r3,r4,r3 /* Required Virtual Address */
129
1302: bl relocate
131
132 /*
133 * For the second relocation, we already set the right tlb entries
134 * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
135 */
136 cmpwi r19,1
137 beq set_ivor
138#endif
139
91/* We try to not make any assumptions about how the boot loader 140/* We try to not make any assumptions about how the boot loader
92 * setup or used the TLBs. We invalidate all mappings from the 141 * setup or used the TLBs. We invalidate all mappings from the
93 * boot loader and load a single entry in TLB1[0] to map the 142 * boot loader and load a single entry in TLB1[0] to map the
@@ -113,6 +162,7 @@ _ENTRY(__early_start)
113#include "fsl_booke_entry_mapping.S" 162#include "fsl_booke_entry_mapping.S"
114#undef ENTRY_MAPPING_BOOT_SETUP 163#undef ENTRY_MAPPING_BOOT_SETUP
115 164
165set_ivor:
116 /* Establish the interrupt vector offsets */ 166 /* Establish the interrupt vector offsets */
117 SET_IVOR(0, CriticalInput); 167 SET_IVOR(0, CriticalInput);
118 SET_IVOR(1, MachineCheck); 168 SET_IVOR(1, MachineCheck);
@@ -166,8 +216,7 @@ _ENTRY(__early_start)
166 /* Check to see if we're the second processor, and jump 216 /* Check to see if we're the second processor, and jump
167 * to the secondary_start code if so 217 * to the secondary_start code if so
168 */ 218 */
169 lis r24, boot_cpuid@h 219 LOAD_REG_ADDR_PIC(r24, boot_cpuid)
170 ori r24, r24, boot_cpuid@l
171 lwz r24, 0(r24) 220 lwz r24, 0(r24)
172 cmpwi r24, -1 221 cmpwi r24, -1
173 mfspr r24,SPRN_PIR 222 mfspr r24,SPRN_PIR
@@ -197,6 +246,18 @@ _ENTRY(__early_start)
197 246
198 bl early_init 247 bl early_init
199 248
249#ifdef CONFIG_RELOCATABLE
250 mr r3,r30
251 mr r4,r31
252#ifdef CONFIG_PHYS_64BIT
253 mr r5,r23
254 mr r6,r25
255#else
256 mr r5,r25
257#endif
258 bl relocate_init
259#endif
260
200#ifdef CONFIG_DYNAMIC_MEMSTART 261#ifdef CONFIG_DYNAMIC_MEMSTART
201 lis r3,kernstart_addr@ha 262 lis r3,kernstart_addr@ha
202 la r3,kernstart_addr@l(r3) 263 la r3,kernstart_addr@l(r3)
@@ -856,6 +917,33 @@ KernelSPE:
856#endif /* CONFIG_SPE */ 917#endif /* CONFIG_SPE */
857 918
858/* 919/*
920 * Translate the effec addr in r3 to phys addr. The phys addr will be put
921 * into r3(higher 32bit) and r4(lower 32bit)
922 */
923get_phys_addr:
924 mfmsr r8
925 mfspr r9,SPRN_PID
926 rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */
927 rlwimi r9,r8,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */
928 mtspr SPRN_MAS6,r9
929
930 tlbsx 0,r3 /* must succeed */
931
932 mfspr r8,SPRN_MAS1
933 mfspr r12,SPRN_MAS3
934 rlwinm r9,r8,25,0x1f /* r9 = log2(page size) */
935 li r10,1024
936 slw r10,r10,r9 /* r10 = page size */
937 addi r10,r10,-1
938 and r11,r3,r10 /* r11 = page offset */
939 andc r4,r12,r10 /* r4 = page base */
940 or r4,r4,r11 /* r4 = devtree phys addr */
941#ifdef CONFIG_PHYS_64BIT
942 mfspr r3,SPRN_MAS7
943#endif
944 blr
945
946/*
859 * Global functions 947 * Global functions
860 */ 948 */
861 949
@@ -1057,24 +1145,36 @@ _GLOBAL(__flush_disable_L1)
1057/* When we get here, r24 needs to hold the CPU # */ 1145/* When we get here, r24 needs to hold the CPU # */
1058 .globl __secondary_start 1146 .globl __secondary_start
1059__secondary_start: 1147__secondary_start:
1060 lis r3,__secondary_hold_acknowledge@h 1148 LOAD_REG_ADDR_PIC(r3, tlbcam_index)
1061 ori r3,r3,__secondary_hold_acknowledge@l 1149 lwz r3,0(r3)
1062 stw r24,0(r3)
1063
1064 li r3,0
1065 mr r4,r24 /* Why? */
1066 bl call_setup_cpu
1067
1068 lis r3,tlbcam_index@ha
1069 lwz r3,tlbcam_index@l(r3)
1070 mtctr r3 1150 mtctr r3
1071 li r26,0 /* r26 safe? */ 1151 li r26,0 /* r26 safe? */
1072 1152
1153 bl switch_to_as1
1154 mr r27,r3 /* tlb entry */
1073 /* Load each CAM entry */ 1155 /* Load each CAM entry */
10741: mr r3,r26 11561: mr r3,r26
1075 bl loadcam_entry 1157 bl loadcam_entry
1076 addi r26,r26,1 1158 addi r26,r26,1
1077 bdnz 1b 1159 bdnz 1b
1160 mr r3,r27 /* tlb entry */
1161 LOAD_REG_ADDR_PIC(r4, memstart_addr)
1162 lwz r4,0(r4)
1163 mr r5,r25 /* phys kernel start */
1164 rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */
1165 subf r4,r5,r4 /* memstart_addr - phys kernel start */
1166 li r5,0 /* no device tree */
1167 li r6,0 /* not boot cpu */
1168 bl restore_to_as0
1169
1170
1171 lis r3,__secondary_hold_acknowledge@h
1172 ori r3,r3,__secondary_hold_acknowledge@l
1173 stw r24,0(r3)
1174
1175 li r3,0
1176 mr r4,r24 /* Why? */
1177 bl call_setup_cpu
1078 1178
1079 /* get current_thread_info and current */ 1179 /* get current_thread_info and current */
1080 lis r1,secondary_ti@ha 1180 lis r1,secondary_ti@ha
@@ -1111,6 +1211,112 @@ __secondary_hold_acknowledge:
1111#endif 1211#endif
1112 1212
1113/* 1213/*
1214 * Create a tlb entry with the same effective and physical address as
1215 * the tlb entry used by the current running code. But set the TS to 1.
1216 * Then switch to the address space 1. It will return with the r3 set to
1217 * the ESEL of the new created tlb.
1218 */
1219_GLOBAL(switch_to_as1)
1220 mflr r5
1221
1222 /* Find a entry not used */
1223 mfspr r3,SPRN_TLB1CFG
1224 andi. r3,r3,0xfff
1225 mfspr r4,SPRN_PID
1226 rlwinm r4,r4,16,0x3fff0000 /* turn PID into MAS6[SPID] */
1227 mtspr SPRN_MAS6,r4
12281: lis r4,0x1000 /* Set MAS0(TLBSEL) = 1 */
1229 addi r3,r3,-1
1230 rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
1231 mtspr SPRN_MAS0,r4
1232 tlbre
1233 mfspr r4,SPRN_MAS1
1234 andis. r4,r4,MAS1_VALID@h
1235 bne 1b
1236
1237 /* Get the tlb entry used by the current running code */
1238 bl 0f
12390: mflr r4
1240 tlbsx 0,r4
1241
1242 mfspr r4,SPRN_MAS1
1243 ori r4,r4,MAS1_TS /* Set the TS = 1 */
1244 mtspr SPRN_MAS1,r4
1245
1246 mfspr r4,SPRN_MAS0
1247 rlwinm r4,r4,0,~MAS0_ESEL_MASK
1248 rlwimi r4,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
1249 mtspr SPRN_MAS0,r4
1250 tlbwe
1251 isync
1252 sync
1253
1254 mfmsr r4
1255 ori r4,r4,MSR_IS | MSR_DS
1256 mtspr SPRN_SRR0,r5
1257 mtspr SPRN_SRR1,r4
1258 sync
1259 rfi
1260
1261/*
1262 * Restore to the address space 0 and also invalidate the tlb entry created
1263 * by switch_to_as1.
1264 * r3 - the tlb entry which should be invalidated
1265 * r4 - __pa(PAGE_OFFSET in AS1) - __pa(PAGE_OFFSET in AS0)
1266 * r5 - device tree virtual address. If r4 is 0, r5 is ignored.
1267 * r6 - boot cpu
1268*/
1269_GLOBAL(restore_to_as0)
1270 mflr r0
1271
1272 bl 0f
12730: mflr r9
1274 addi r9,r9,1f - 0b
1275
1276 /*
1277 * We may map the PAGE_OFFSET in AS0 to a different physical address,
1278 * so we need calculate the right jump and device tree address based
1279 * on the offset passed by r4.
1280 */
1281 add r9,r9,r4
1282 add r5,r5,r4
1283 add r0,r0,r4
1284
12852: mfmsr r7
1286 li r8,(MSR_IS | MSR_DS)
1287 andc r7,r7,r8
1288
1289 mtspr SPRN_SRR0,r9
1290 mtspr SPRN_SRR1,r7
1291 sync
1292 rfi
1293
1294 /* Invalidate the temporary tlb entry for AS1 */
12951: lis r9,0x1000 /* Set MAS0(TLBSEL) = 1 */
1296 rlwimi r9,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */
1297 mtspr SPRN_MAS0,r9
1298 tlbre
1299 mfspr r9,SPRN_MAS1
1300 rlwinm r9,r9,0,2,31 /* Clear MAS1 Valid and IPPROT */
1301 mtspr SPRN_MAS1,r9
1302 tlbwe
1303 isync
1304
1305 cmpwi r4,0
1306 cmpwi cr1,r6,0
1307 cror eq,4*cr1+eq,eq
1308 bne 3f /* offset != 0 && is_boot_cpu */
1309 mtlr r0
1310 blr
1311
1312 /*
1313 * The PAGE_OFFSET will map to a different physical address,
1314 * jump to _start to do another relocation again.
1315 */
13163: mr r3,r5
1317 bl _start
1318
1319/*
1114 * We put a few things here that have to be page-aligned. This stuff 1320 * We put a few things here that have to be page-aligned. This stuff
1115 * goes at the beginning of the data segment, which is page-aligned. 1321 * goes at the beginning of the data segment, which is page-aligned.
1116 */ 1322 */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index f0b47d1a6b0e..b0a1792279bb 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -28,7 +28,6 @@
28#include <linux/percpu.h> 28#include <linux/percpu.h>
29#include <linux/kernel.h> 29#include <linux/kernel.h>
30#include <linux/sched.h> 30#include <linux/sched.h>
31#include <linux/init.h>
32#include <linux/smp.h> 31#include <linux/smp.h>
33 32
34#include <asm/hw_breakpoint.h> 33#include <asm/hw_breakpoint.h>
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 847e40e62fce..3fdef0f0c67f 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -84,6 +84,7 @@ _GLOBAL(power7_nap)
84 std r9,_MSR(r1) 84 std r9,_MSR(r1)
85 std r1,PACAR1(r13) 85 std r1,PACAR1(r13)
86 86
87_GLOBAL(power7_enter_nap_mode)
87#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE 88#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
88 /* Tell KVM we're napping */ 89 /* Tell KVM we're napping */
89 li r4,KVM_HWTHREAD_IN_NAP 90 li r4,KVM_HWTHREAD_IN_NAP
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 97a3715ac8bd..b82227e7e21b 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -3,7 +3,6 @@
3 * 3 *
4 * (C) Copyright 2004 Linus Torvalds 4 * (C) Copyright 2004 Linus Torvalds
5 */ 5 */
6#include <linux/init.h>
7#include <linux/pci.h> 6#include <linux/pci.h>
8#include <linux/mm.h> 7#include <linux/mm.h>
9#include <linux/export.h> 8#include <linux/export.h>
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 572bb5b95f35..88e3ec6e1d96 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -251,14 +251,13 @@ again:
251 251
252 if (dev) 252 if (dev)
253 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, 253 boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
254 1 << IOMMU_PAGE_SHIFT); 254 1 << tbl->it_page_shift);
255 else 255 else
256 boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT); 256 boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
257 /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ 257 /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
258 258
259 n = iommu_area_alloc(tbl->it_map, limit, start, npages, 259 n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
260 tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT, 260 boundary_size >> tbl->it_page_shift, align_mask);
261 align_mask);
262 if (n == -1) { 261 if (n == -1) {
263 if (likely(pass == 0)) { 262 if (likely(pass == 0)) {
264 /* First try the pool from the start */ 263 /* First try the pool from the start */
@@ -320,12 +319,12 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
320 return DMA_ERROR_CODE; 319 return DMA_ERROR_CODE;
321 320
322 entry += tbl->it_offset; /* Offset into real TCE table */ 321 entry += tbl->it_offset; /* Offset into real TCE table */
323 ret = entry << IOMMU_PAGE_SHIFT; /* Set the return dma address */ 322 ret = entry << tbl->it_page_shift; /* Set the return dma address */
324 323
325 /* Put the TCEs in the HW table */ 324 /* Put the TCEs in the HW table */
326 build_fail = ppc_md.tce_build(tbl, entry, npages, 325 build_fail = ppc_md.tce_build(tbl, entry, npages,
327 (unsigned long)page & IOMMU_PAGE_MASK, 326 (unsigned long)page &
328 direction, attrs); 327 IOMMU_PAGE_MASK(tbl), direction, attrs);
329 328
330 /* ppc_md.tce_build() only returns non-zero for transient errors. 329 /* ppc_md.tce_build() only returns non-zero for transient errors.
331 * Clean up the table bitmap in this case and return 330 * Clean up the table bitmap in this case and return
@@ -352,7 +351,7 @@ static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
352{ 351{
353 unsigned long entry, free_entry; 352 unsigned long entry, free_entry;
354 353
355 entry = dma_addr >> IOMMU_PAGE_SHIFT; 354 entry = dma_addr >> tbl->it_page_shift;
356 free_entry = entry - tbl->it_offset; 355 free_entry = entry - tbl->it_offset;
357 356
358 if (((free_entry + npages) > tbl->it_size) || 357 if (((free_entry + npages) > tbl->it_size) ||
@@ -401,7 +400,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
401 unsigned long flags; 400 unsigned long flags;
402 struct iommu_pool *pool; 401 struct iommu_pool *pool;
403 402
404 entry = dma_addr >> IOMMU_PAGE_SHIFT; 403 entry = dma_addr >> tbl->it_page_shift;
405 free_entry = entry - tbl->it_offset; 404 free_entry = entry - tbl->it_offset;
406 405
407 pool = get_pool(tbl, free_entry); 406 pool = get_pool(tbl, free_entry);
@@ -468,13 +467,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
468 } 467 }
469 /* Allocate iommu entries for that segment */ 468 /* Allocate iommu entries for that segment */
470 vaddr = (unsigned long) sg_virt(s); 469 vaddr = (unsigned long) sg_virt(s);
471 npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE); 470 npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
472 align = 0; 471 align = 0;
473 if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && slen >= PAGE_SIZE && 472 if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
474 (vaddr & ~PAGE_MASK) == 0) 473 (vaddr & ~PAGE_MASK) == 0)
475 align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; 474 align = PAGE_SHIFT - tbl->it_page_shift;
476 entry = iommu_range_alloc(dev, tbl, npages, &handle, 475 entry = iommu_range_alloc(dev, tbl, npages, &handle,
477 mask >> IOMMU_PAGE_SHIFT, align); 476 mask >> tbl->it_page_shift, align);
478 477
479 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen); 478 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
480 479
@@ -489,16 +488,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
489 488
490 /* Convert entry to a dma_addr_t */ 489 /* Convert entry to a dma_addr_t */
491 entry += tbl->it_offset; 490 entry += tbl->it_offset;
492 dma_addr = entry << IOMMU_PAGE_SHIFT; 491 dma_addr = entry << tbl->it_page_shift;
493 dma_addr |= (s->offset & ~IOMMU_PAGE_MASK); 492 dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
494 493
495 DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n", 494 DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
496 npages, entry, dma_addr); 495 npages, entry, dma_addr);
497 496
498 /* Insert into HW table */ 497 /* Insert into HW table */
499 build_fail = ppc_md.tce_build(tbl, entry, npages, 498 build_fail = ppc_md.tce_build(tbl, entry, npages,
500 vaddr & IOMMU_PAGE_MASK, 499 vaddr & IOMMU_PAGE_MASK(tbl),
501 direction, attrs); 500 direction, attrs);
502 if(unlikely(build_fail)) 501 if(unlikely(build_fail))
503 goto failure; 502 goto failure;
504 503
@@ -559,9 +558,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
559 if (s->dma_length != 0) { 558 if (s->dma_length != 0) {
560 unsigned long vaddr, npages; 559 unsigned long vaddr, npages;
561 560
562 vaddr = s->dma_address & IOMMU_PAGE_MASK; 561 vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
563 npages = iommu_num_pages(s->dma_address, s->dma_length, 562 npages = iommu_num_pages(s->dma_address, s->dma_length,
564 IOMMU_PAGE_SIZE); 563 IOMMU_PAGE_SIZE(tbl));
565 __iommu_free(tbl, vaddr, npages); 564 __iommu_free(tbl, vaddr, npages);
566 s->dma_address = DMA_ERROR_CODE; 565 s->dma_address = DMA_ERROR_CODE;
567 s->dma_length = 0; 566 s->dma_length = 0;
@@ -592,7 +591,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
592 if (sg->dma_length == 0) 591 if (sg->dma_length == 0)
593 break; 592 break;
594 npages = iommu_num_pages(dma_handle, sg->dma_length, 593 npages = iommu_num_pages(dma_handle, sg->dma_length,
595 IOMMU_PAGE_SIZE); 594 IOMMU_PAGE_SIZE(tbl));
596 __iommu_free(tbl, dma_handle, npages); 595 __iommu_free(tbl, dma_handle, npages);
597 sg = sg_next(sg); 596 sg = sg_next(sg);
598 } 597 }
@@ -676,7 +675,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
676 set_bit(0, tbl->it_map); 675 set_bit(0, tbl->it_map);
677 676
678 /* We only split the IOMMU table if we have 1GB or more of space */ 677 /* We only split the IOMMU table if we have 1GB or more of space */
679 if ((tbl->it_size << IOMMU_PAGE_SHIFT) >= (1UL * 1024 * 1024 * 1024)) 678 if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
680 tbl->nr_pools = IOMMU_NR_POOLS; 679 tbl->nr_pools = IOMMU_NR_POOLS;
681 else 680 else
682 tbl->nr_pools = 1; 681 tbl->nr_pools = 1;
@@ -768,16 +767,16 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
768 767
769 vaddr = page_address(page) + offset; 768 vaddr = page_address(page) + offset;
770 uaddr = (unsigned long)vaddr; 769 uaddr = (unsigned long)vaddr;
771 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE); 770 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
772 771
773 if (tbl) { 772 if (tbl) {
774 align = 0; 773 align = 0;
775 if (IOMMU_PAGE_SHIFT < PAGE_SHIFT && size >= PAGE_SIZE && 774 if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
776 ((unsigned long)vaddr & ~PAGE_MASK) == 0) 775 ((unsigned long)vaddr & ~PAGE_MASK) == 0)
777 align = PAGE_SHIFT - IOMMU_PAGE_SHIFT; 776 align = PAGE_SHIFT - tbl->it_page_shift;
778 777
779 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction, 778 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
780 mask >> IOMMU_PAGE_SHIFT, align, 779 mask >> tbl->it_page_shift, align,
781 attrs); 780 attrs);
782 if (dma_handle == DMA_ERROR_CODE) { 781 if (dma_handle == DMA_ERROR_CODE) {
783 if (printk_ratelimit()) { 782 if (printk_ratelimit()) {
@@ -786,7 +785,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
786 npages); 785 npages);
787 } 786 }
788 } else 787 } else
789 dma_handle |= (uaddr & ~IOMMU_PAGE_MASK); 788 dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
790 } 789 }
791 790
792 return dma_handle; 791 return dma_handle;
@@ -801,7 +800,8 @@ void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
801 BUG_ON(direction == DMA_NONE); 800 BUG_ON(direction == DMA_NONE);
802 801
803 if (tbl) { 802 if (tbl) {
804 npages = iommu_num_pages(dma_handle, size, IOMMU_PAGE_SIZE); 803 npages = iommu_num_pages(dma_handle, size,
804 IOMMU_PAGE_SIZE(tbl));
805 iommu_free(tbl, dma_handle, npages); 805 iommu_free(tbl, dma_handle, npages);
806 } 806 }
807} 807}
@@ -845,10 +845,10 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
845 memset(ret, 0, size); 845 memset(ret, 0, size);
846 846
847 /* Set up tces to cover the allocated range */ 847 /* Set up tces to cover the allocated range */
848 nio_pages = size >> IOMMU_PAGE_SHIFT; 848 nio_pages = size >> tbl->it_page_shift;
849 io_order = get_iommu_order(size); 849 io_order = get_iommu_order(size, tbl);
850 mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL, 850 mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
851 mask >> IOMMU_PAGE_SHIFT, io_order, NULL); 851 mask >> tbl->it_page_shift, io_order, NULL);
852 if (mapping == DMA_ERROR_CODE) { 852 if (mapping == DMA_ERROR_CODE) {
853 free_pages((unsigned long)ret, order); 853 free_pages((unsigned long)ret, order);
854 return NULL; 854 return NULL;
@@ -864,7 +864,7 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
864 unsigned int nio_pages; 864 unsigned int nio_pages;
865 865
866 size = PAGE_ALIGN(size); 866 size = PAGE_ALIGN(size);
867 nio_pages = size >> IOMMU_PAGE_SHIFT; 867 nio_pages = size >> tbl->it_page_shift;
868 iommu_free(tbl, dma_handle, nio_pages); 868 iommu_free(tbl, dma_handle, nio_pages);
869 size = PAGE_ALIGN(size); 869 size = PAGE_ALIGN(size);
870 free_pages((unsigned long)vaddr, get_order(size)); 870 free_pages((unsigned long)vaddr, get_order(size));
@@ -935,10 +935,10 @@ int iommu_tce_clear_param_check(struct iommu_table *tbl,
935 if (tce_value) 935 if (tce_value)
936 return -EINVAL; 936 return -EINVAL;
937 937
938 if (ioba & ~IOMMU_PAGE_MASK) 938 if (ioba & ~IOMMU_PAGE_MASK(tbl))
939 return -EINVAL; 939 return -EINVAL;
940 940
941 ioba >>= IOMMU_PAGE_SHIFT; 941 ioba >>= tbl->it_page_shift;
942 if (ioba < tbl->it_offset) 942 if (ioba < tbl->it_offset)
943 return -EINVAL; 943 return -EINVAL;
944 944
@@ -955,13 +955,13 @@ int iommu_tce_put_param_check(struct iommu_table *tbl,
955 if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ))) 955 if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
956 return -EINVAL; 956 return -EINVAL;
957 957
958 if (tce & ~(IOMMU_PAGE_MASK | TCE_PCI_WRITE | TCE_PCI_READ)) 958 if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
959 return -EINVAL; 959 return -EINVAL;
960 960
961 if (ioba & ~IOMMU_PAGE_MASK) 961 if (ioba & ~IOMMU_PAGE_MASK(tbl))
962 return -EINVAL; 962 return -EINVAL;
963 963
964 ioba >>= IOMMU_PAGE_SHIFT; 964 ioba >>= tbl->it_page_shift;
965 if (ioba < tbl->it_offset) 965 if (ioba < tbl->it_offset)
966 return -EINVAL; 966 return -EINVAL;
967 967
@@ -1037,7 +1037,7 @@ int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
1037 1037
1038 /* if (unlikely(ret)) 1038 /* if (unlikely(ret))
1039 pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n", 1039 pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
1040 __func__, hwaddr, entry << IOMMU_PAGE_SHIFT, 1040 __func__, hwaddr, entry << IOMMU_PAGE_SHIFT(tbl),
1041 hwaddr, ret); */ 1041 hwaddr, ret); */
1042 1042
1043 return ret; 1043 return ret;
@@ -1049,14 +1049,14 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
1049{ 1049{
1050 int ret; 1050 int ret;
1051 struct page *page = NULL; 1051 struct page *page = NULL;
1052 unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK & ~PAGE_MASK; 1052 unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
1053 enum dma_data_direction direction = iommu_tce_direction(tce); 1053 enum dma_data_direction direction = iommu_tce_direction(tce);
1054 1054
1055 ret = get_user_pages_fast(tce & PAGE_MASK, 1, 1055 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
1056 direction != DMA_TO_DEVICE, &page); 1056 direction != DMA_TO_DEVICE, &page);
1057 if (unlikely(ret != 1)) { 1057 if (unlikely(ret != 1)) {
1058 /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n", 1058 /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
1059 tce, entry << IOMMU_PAGE_SHIFT, ret); */ 1059 tce, entry << IOMMU_PAGE_SHIFT(tbl), ret); */
1060 return -EFAULT; 1060 return -EFAULT;
1061 } 1061 }
1062 hwaddr = (unsigned long) page_address(page) + offset; 1062 hwaddr = (unsigned long) page_address(page) + offset;
@@ -1067,7 +1067,7 @@ int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
1067 1067
1068 if (ret < 0) 1068 if (ret < 0)
1069 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n", 1069 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
1070 __func__, entry << IOMMU_PAGE_SHIFT, tce, ret); 1070 __func__, entry << tbl->it_page_shift, tce, ret);
1071 1071
1072 return ret; 1072 return ret;
1073} 1073}
@@ -1088,6 +1088,14 @@ int iommu_take_ownership(struct iommu_table *tbl)
1088 memset(tbl->it_map, 0xff, sz); 1088 memset(tbl->it_map, 0xff, sz);
1089 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size); 1089 iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
1090 1090
1091 /*
1092 * Disable iommu bypass, otherwise the user can DMA to all of
1093 * our physical memory via the bypass window instead of just
1094 * the pages that has been explicitly mapped into the iommu
1095 */
1096 if (tbl->set_bypass)
1097 tbl->set_bypass(tbl, false);
1098
1091 return 0; 1099 return 0;
1092} 1100}
1093EXPORT_SYMBOL_GPL(iommu_take_ownership); 1101EXPORT_SYMBOL_GPL(iommu_take_ownership);
@@ -1102,10 +1110,14 @@ void iommu_release_ownership(struct iommu_table *tbl)
1102 /* Restore bit#0 set by iommu_init_table() */ 1110 /* Restore bit#0 set by iommu_init_table() */
1103 if (tbl->it_offset == 0) 1111 if (tbl->it_offset == 0)
1104 set_bit(0, tbl->it_map); 1112 set_bit(0, tbl->it_map);
1113
1114 /* The kernel owns the device now, we can restore the iommu bypass */
1115 if (tbl->set_bypass)
1116 tbl->set_bypass(tbl, true);
1105} 1117}
1106EXPORT_SYMBOL_GPL(iommu_release_ownership); 1118EXPORT_SYMBOL_GPL(iommu_release_ownership);
1107 1119
1108static int iommu_add_device(struct device *dev) 1120int iommu_add_device(struct device *dev)
1109{ 1121{
1110 struct iommu_table *tbl; 1122 struct iommu_table *tbl;
1111 int ret = 0; 1123 int ret = 0;
@@ -1127,6 +1139,12 @@ static int iommu_add_device(struct device *dev)
1127 pr_debug("iommu_tce: adding %s to iommu group %d\n", 1139 pr_debug("iommu_tce: adding %s to iommu group %d\n",
1128 dev_name(dev), iommu_group_id(tbl->it_group)); 1140 dev_name(dev), iommu_group_id(tbl->it_group));
1129 1141
1142 if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
1143 pr_err("iommu_tce: unsupported iommu page size.");
1144 pr_err("%s has not been added\n", dev_name(dev));
1145 return -EINVAL;
1146 }
1147
1130 ret = iommu_group_add_device(tbl->it_group, dev); 1148 ret = iommu_group_add_device(tbl->it_group, dev);
1131 if (ret < 0) 1149 if (ret < 0)
1132 pr_err("iommu_tce: %s has not been added, ret=%d\n", 1150 pr_err("iommu_tce: %s has not been added, ret=%d\n",
@@ -1134,52 +1152,23 @@ static int iommu_add_device(struct device *dev)
1134 1152
1135 return ret; 1153 return ret;
1136} 1154}
1155EXPORT_SYMBOL_GPL(iommu_add_device);
1137 1156
1138static void iommu_del_device(struct device *dev) 1157void iommu_del_device(struct device *dev)
1139{
1140 iommu_group_remove_device(dev);
1141}
1142
1143static int iommu_bus_notifier(struct notifier_block *nb,
1144 unsigned long action, void *data)
1145{ 1158{
1146 struct device *dev = data; 1159 /*
1147 1160 * Some devices might not have IOMMU table and group
1148 switch (action) { 1161 * and we needn't detach them from the associated
1149 case BUS_NOTIFY_ADD_DEVICE: 1162 * IOMMU groups
1150 return iommu_add_device(dev); 1163 */
1151 case BUS_NOTIFY_DEL_DEVICE: 1164 if (!dev->iommu_group) {
1152 iommu_del_device(dev); 1165 pr_debug("iommu_tce: skipping device %s with no tbl\n",
1153 return 0; 1166 dev_name(dev));
1154 default: 1167 return;
1155 return 0;
1156 } 1168 }
1157}
1158
1159static struct notifier_block tce_iommu_bus_nb = {
1160 .notifier_call = iommu_bus_notifier,
1161};
1162
1163static int __init tce_iommu_init(void)
1164{
1165 struct pci_dev *pdev = NULL;
1166
1167 BUILD_BUG_ON(PAGE_SIZE < IOMMU_PAGE_SIZE);
1168
1169 for_each_pci_dev(pdev)
1170 iommu_add_device(&pdev->dev);
1171
1172 bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
1173 return 0;
1174}
1175
1176subsys_initcall_sync(tce_iommu_init);
1177
1178#else
1179 1169
1180void iommu_register_group(struct iommu_table *tbl, 1170 iommu_group_remove_device(dev);
1181 int pci_domain_number, unsigned long pe_num)
1182{
1183} 1171}
1172EXPORT_SYMBOL_GPL(iommu_del_device);
1184 1173
1185#endif /* CONFIG_IOMMU_API */ 1174#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ba0165615215..1d0848bba049 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -354,8 +354,13 @@ int arch_show_interrupts(struct seq_file *p, int prec)
354 354
355 seq_printf(p, "%*s: ", prec, "LOC"); 355 seq_printf(p, "%*s: ", prec, "LOC");
356 for_each_online_cpu(j) 356 for_each_online_cpu(j)
357 seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs); 357 seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event);
358 seq_printf(p, " Local timer interrupts\n"); 358 seq_printf(p, " Local timer interrupts for timer event device\n");
359
360 seq_printf(p, "%*s: ", prec, "LOC");
361 for_each_online_cpu(j)
362 seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others);
363 seq_printf(p, " Local timer interrupts for others\n");
359 364
360 seq_printf(p, "%*s: ", prec, "SPU"); 365 seq_printf(p, "%*s: ", prec, "SPU");
361 for_each_online_cpu(j) 366 for_each_online_cpu(j)
@@ -389,11 +394,12 @@ int arch_show_interrupts(struct seq_file *p, int prec)
389 */ 394 */
390u64 arch_irq_stat_cpu(unsigned int cpu) 395u64 arch_irq_stat_cpu(unsigned int cpu)
391{ 396{
392 u64 sum = per_cpu(irq_stat, cpu).timer_irqs; 397 u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
393 398
394 sum += per_cpu(irq_stat, cpu).pmu_irqs; 399 sum += per_cpu(irq_stat, cpu).pmu_irqs;
395 sum += per_cpu(irq_stat, cpu).mce_exceptions; 400 sum += per_cpu(irq_stat, cpu).mce_exceptions;
396 sum += per_cpu(irq_stat, cpu).spurious_irqs; 401 sum += per_cpu(irq_stat, cpu).spurious_irqs;
402 sum += per_cpu(irq_stat, cpu).timer_irqs_others;
397#ifdef CONFIG_PPC_DOORBELL 403#ifdef CONFIG_PPC_DOORBELL
398 sum += per_cpu(irq_stat, cpu).doorbell_irqs; 404 sum += per_cpu(irq_stat, cpu).doorbell_irqs;
399#endif 405#endif
@@ -553,8 +559,13 @@ void exc_lvl_ctx_init(void)
553#ifdef CONFIG_PPC64 559#ifdef CONFIG_PPC64
554 cpu_nr = i; 560 cpu_nr = i;
555#else 561#else
562#ifdef CONFIG_SMP
556 cpu_nr = get_hard_smp_processor_id(i); 563 cpu_nr = get_hard_smp_processor_id(i);
564#else
565 cpu_nr = 0;
566#endif
557#endif 567#endif
568
558 memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE); 569 memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
559 tp = critirq_ctx[cpu_nr]; 570 tp = critirq_ctx[cpu_nr];
560 tp->cpu = cpu_nr; 571 tp->cpu = cpu_nr;
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 83e89d310734..8504657379f1 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -15,7 +15,6 @@
15 */ 15 */
16 16
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/init.h>
19#include <linux/kgdb.h> 18#include <linux/kgdb.h>
20#include <linux/smp.h> 19#include <linux/smp.h>
21#include <linux/signal.h> 20#include <linux/signal.h>
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index db28032e320e..6a0175297b0d 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -413,13 +413,13 @@ static void kvm_map_magic_page(void *data)
413{ 413{
414 u32 *features = data; 414 u32 *features = data;
415 415
416 ulong in[8]; 416 ulong in[8] = {0};
417 ulong out[8]; 417 ulong out[8];
418 418
419 in[0] = KVM_MAGIC_PAGE; 419 in[0] = KVM_MAGIC_PAGE;
420 in[1] = KVM_MAGIC_PAGE; 420 in[1] = KVM_MAGIC_PAGE;
421 421
422 kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE)); 422 epapr_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
423 423
424 *features = out[0]; 424 *features = out[0];
425} 425}
@@ -711,43 +711,6 @@ static void kvm_use_magic_page(void)
711 kvm_patching_worked ? "worked" : "failed"); 711 kvm_patching_worked ? "worked" : "failed");
712} 712}
713 713
714unsigned long kvm_hypercall(unsigned long *in,
715 unsigned long *out,
716 unsigned long nr)
717{
718 unsigned long register r0 asm("r0");
719 unsigned long register r3 asm("r3") = in[0];
720 unsigned long register r4 asm("r4") = in[1];
721 unsigned long register r5 asm("r5") = in[2];
722 unsigned long register r6 asm("r6") = in[3];
723 unsigned long register r7 asm("r7") = in[4];
724 unsigned long register r8 asm("r8") = in[5];
725 unsigned long register r9 asm("r9") = in[6];
726 unsigned long register r10 asm("r10") = in[7];
727 unsigned long register r11 asm("r11") = nr;
728 unsigned long register r12 asm("r12");
729
730 asm volatile("bl epapr_hypercall_start"
731 : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
732 "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
733 "=r"(r12)
734 : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
735 "r"(r9), "r"(r10), "r"(r11)
736 : "memory", "cc", "xer", "ctr", "lr");
737
738 out[0] = r4;
739 out[1] = r5;
740 out[2] = r6;
741 out[3] = r7;
742 out[4] = r8;
743 out[5] = r9;
744 out[6] = r10;
745 out[7] = r11;
746
747 return r3;
748}
749EXPORT_SYMBOL_GPL(kvm_hypercall);
750
751static __init void kvm_free_tmp(void) 714static __init void kvm_free_tmp(void)
752{ 715{
753 free_reserved_area(&kvm_tmp[kvm_tmp_index], 716 free_reserved_area(&kvm_tmp[kvm_tmp_index],
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index 75d4f7340da8..015ae55c1868 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -196,7 +196,9 @@ int overlaps_crashkernel(unsigned long start, unsigned long size)
196 196
197/* Values we need to export to the second kernel via the device tree. */ 197/* Values we need to export to the second kernel via the device tree. */
198static phys_addr_t kernel_end; 198static phys_addr_t kernel_end;
199static phys_addr_t crashk_base;
199static phys_addr_t crashk_size; 200static phys_addr_t crashk_size;
201static unsigned long long mem_limit;
200 202
201static struct property kernel_end_prop = { 203static struct property kernel_end_prop = {
202 .name = "linux,kernel-end", 204 .name = "linux,kernel-end",
@@ -207,7 +209,7 @@ static struct property kernel_end_prop = {
207static struct property crashk_base_prop = { 209static struct property crashk_base_prop = {
208 .name = "linux,crashkernel-base", 210 .name = "linux,crashkernel-base",
209 .length = sizeof(phys_addr_t), 211 .length = sizeof(phys_addr_t),
210 .value = &crashk_res.start, 212 .value = &crashk_base
211}; 213};
212 214
213static struct property crashk_size_prop = { 215static struct property crashk_size_prop = {
@@ -219,9 +221,11 @@ static struct property crashk_size_prop = {
219static struct property memory_limit_prop = { 221static struct property memory_limit_prop = {
220 .name = "linux,memory-limit", 222 .name = "linux,memory-limit",
221 .length = sizeof(unsigned long long), 223 .length = sizeof(unsigned long long),
222 .value = &memory_limit, 224 .value = &mem_limit,
223}; 225};
224 226
227#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG)
228
225static void __init export_crashk_values(struct device_node *node) 229static void __init export_crashk_values(struct device_node *node)
226{ 230{
227 struct property *prop; 231 struct property *prop;
@@ -237,8 +241,9 @@ static void __init export_crashk_values(struct device_node *node)
237 of_remove_property(node, prop); 241 of_remove_property(node, prop);
238 242
239 if (crashk_res.start != 0) { 243 if (crashk_res.start != 0) {
244 crashk_base = cpu_to_be_ulong(crashk_res.start),
240 of_add_property(node, &crashk_base_prop); 245 of_add_property(node, &crashk_base_prop);
241 crashk_size = resource_size(&crashk_res); 246 crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
242 of_add_property(node, &crashk_size_prop); 247 of_add_property(node, &crashk_size_prop);
243 } 248 }
244 249
@@ -246,6 +251,7 @@ static void __init export_crashk_values(struct device_node *node)
246 * memory_limit is required by the kexec-tools to limit the 251 * memory_limit is required by the kexec-tools to limit the
247 * crash regions to the actual memory used. 252 * crash regions to the actual memory used.
248 */ 253 */
254 mem_limit = cpu_to_be_ulong(memory_limit);
249 of_update_property(node, &memory_limit_prop); 255 of_update_property(node, &memory_limit_prop);
250} 256}
251 257
@@ -264,7 +270,7 @@ static int __init kexec_setup(void)
264 of_remove_property(node, prop); 270 of_remove_property(node, prop);
265 271
266 /* information needed by userspace when using default_machine_kexec */ 272 /* information needed by userspace when using default_machine_kexec */
267 kernel_end = __pa(_end); 273 kernel_end = cpu_to_be_ulong(__pa(_end));
268 of_add_property(node, &kernel_end_prop); 274 of_add_property(node, &kernel_end_prop);
269 275
270 export_crashk_values(node); 276 export_crashk_values(node);
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index be4e6d648f60..59d229a2a3e0 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -369,6 +369,7 @@ void default_machine_kexec(struct kimage *image)
369 369
370/* Values we need to export to the second kernel via the device tree. */ 370/* Values we need to export to the second kernel via the device tree. */
371static unsigned long htab_base; 371static unsigned long htab_base;
372static unsigned long htab_size;
372 373
373static struct property htab_base_prop = { 374static struct property htab_base_prop = {
374 .name = "linux,htab-base", 375 .name = "linux,htab-base",
@@ -379,7 +380,7 @@ static struct property htab_base_prop = {
379static struct property htab_size_prop = { 380static struct property htab_size_prop = {
380 .name = "linux,htab-size", 381 .name = "linux,htab-size",
381 .length = sizeof(unsigned long), 382 .length = sizeof(unsigned long),
382 .value = &htab_size_bytes, 383 .value = &htab_size,
383}; 384};
384 385
385static int __init export_htab_values(void) 386static int __init export_htab_values(void)
@@ -403,8 +404,9 @@ static int __init export_htab_values(void)
403 if (prop) 404 if (prop)
404 of_remove_property(node, prop); 405 of_remove_property(node, prop);
405 406
406 htab_base = __pa(htab_address); 407 htab_base = cpu_to_be64(__pa(htab_address));
407 of_add_property(node, &htab_base_prop); 408 of_add_property(node, &htab_base_prop);
409 htab_size = cpu_to_be64(htab_size_bytes);
408 of_add_property(node, &htab_size_prop); 410 of_add_property(node, &htab_size_prop);
409 411
410 of_node_put(node); 412 of_node_put(node);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 000000000000..cadef7e64e42
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,352 @@
1/*
2 * Machine check exception handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce: " fmt
24
25#include <linux/types.h>
26#include <linux/ptrace.h>
27#include <linux/percpu.h>
28#include <linux/export.h>
29#include <linux/irq_work.h>
30#include <asm/mce.h>
31
32static DEFINE_PER_CPU(int, mce_nest_count);
33static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
34
35/* Queue for delayed MCE events. */
36static DEFINE_PER_CPU(int, mce_queue_count);
37static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
38
39static void machine_check_process_queued_event(struct irq_work *work);
40struct irq_work mce_event_process_work = {
41 .func = machine_check_process_queued_event,
42};
43
44static void mce_set_error_info(struct machine_check_event *mce,
45 struct mce_error_info *mce_err)
46{
47 mce->error_type = mce_err->error_type;
48 switch (mce_err->error_type) {
49 case MCE_ERROR_TYPE_UE:
50 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
51 break;
52 case MCE_ERROR_TYPE_SLB:
53 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
54 break;
55 case MCE_ERROR_TYPE_ERAT:
56 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
57 break;
58 case MCE_ERROR_TYPE_TLB:
59 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
60 break;
61 case MCE_ERROR_TYPE_UNKNOWN:
62 default:
63 break;
64 }
65}
66
67/*
68 * Decode and save high level MCE information into per cpu buffer which
69 * is an array of machine_check_event structure.
70 */
71void save_mce_event(struct pt_regs *regs, long handled,
72 struct mce_error_info *mce_err,
73 uint64_t addr)
74{
75 uint64_t srr1;
76 int index = __get_cpu_var(mce_nest_count)++;
77 struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
78
79 /*
80 * Return if we don't have enough space to log mce event.
81 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
82 * the check below will stop buffer overrun.
83 */
84 if (index >= MAX_MC_EVT)
85 return;
86
87 /* Populate generic machine check info */
88 mce->version = MCE_V1;
89 mce->srr0 = regs->nip;
90 mce->srr1 = regs->msr;
91 mce->gpr3 = regs->gpr[3];
92 mce->in_use = 1;
93
94 mce->initiator = MCE_INITIATOR_CPU;
95 if (handled)
96 mce->disposition = MCE_DISPOSITION_RECOVERED;
97 else
98 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
99 mce->severity = MCE_SEV_ERROR_SYNC;
100
101 srr1 = regs->msr;
102
103 /*
104 * Populate the mce error_type and type-specific error_type.
105 */
106 mce_set_error_info(mce, mce_err);
107
108 if (!addr)
109 return;
110
111 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
112 mce->u.tlb_error.effective_address_provided = true;
113 mce->u.tlb_error.effective_address = addr;
114 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
115 mce->u.slb_error.effective_address_provided = true;
116 mce->u.slb_error.effective_address = addr;
117 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
118 mce->u.erat_error.effective_address_provided = true;
119 mce->u.erat_error.effective_address = addr;
120 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
121 mce->u.ue_error.effective_address_provided = true;
122 mce->u.ue_error.effective_address = addr;
123 }
124 return;
125}
126
127/*
128 * get_mce_event:
129 * mce Pointer to machine_check_event structure to be filled.
130 * release Flag to indicate whether to free the event slot or not.
131 * 0 <= do not release the mce event. Caller will invoke
132 * release_mce_event() once event has been consumed.
133 * 1 <= release the slot.
134 *
135 * return 1 = success
136 * 0 = failure
137 *
138 * get_mce_event() will be called by platform specific machine check
139 * handle routine and in KVM.
140 * When we call get_mce_event(), we are still in interrupt context and
141 * preemption will not be scheduled until ret_from_expect() routine
142 * is called.
143 */
144int get_mce_event(struct machine_check_event *mce, bool release)
145{
146 int index = __get_cpu_var(mce_nest_count) - 1;
147 struct machine_check_event *mc_evt;
148 int ret = 0;
149
150 /* Sanity check */
151 if (index < 0)
152 return ret;
153
154 /* Check if we have MCE info to process. */
155 if (index < MAX_MC_EVT) {
156 mc_evt = &__get_cpu_var(mce_event[index]);
157 /* Copy the event structure and release the original */
158 if (mce)
159 *mce = *mc_evt;
160 if (release)
161 mc_evt->in_use = 0;
162 ret = 1;
163 }
164 /* Decrement the count to free the slot. */
165 if (release)
166 __get_cpu_var(mce_nest_count)--;
167
168 return ret;
169}
170
171void release_mce_event(void)
172{
173 get_mce_event(NULL, true);
174}
175
176/*
177 * Queue up the MCE event which then can be handled later.
178 */
179void machine_check_queue_event(void)
180{
181 int index;
182 struct machine_check_event evt;
183
184 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
185 return;
186
187 index = __get_cpu_var(mce_queue_count)++;
188 /* If queue is full, just return for now. */
189 if (index >= MAX_MC_EVT) {
190 __get_cpu_var(mce_queue_count)--;
191 return;
192 }
193 __get_cpu_var(mce_event_queue[index]) = evt;
194
195 /* Queue irq work to process this event later. */
196 irq_work_queue(&mce_event_process_work);
197}
198
199/*
200 * process pending MCE event from the mce event queue. This function will be
201 * called during syscall exit.
202 */
203static void machine_check_process_queued_event(struct irq_work *work)
204{
205 int index;
206
207 /*
208 * For now just print it to console.
209 * TODO: log this error event to FSP or nvram.
210 */
211 while (__get_cpu_var(mce_queue_count) > 0) {
212 index = __get_cpu_var(mce_queue_count) - 1;
213 machine_check_print_event_info(
214 &__get_cpu_var(mce_event_queue[index]));
215 __get_cpu_var(mce_queue_count)--;
216 }
217}
218
219void machine_check_print_event_info(struct machine_check_event *evt)
220{
221 const char *level, *sevstr, *subtype;
222 static const char *mc_ue_types[] = {
223 "Indeterminate",
224 "Instruction fetch",
225 "Page table walk ifetch",
226 "Load/Store",
227 "Page table walk Load/Store",
228 };
229 static const char *mc_slb_types[] = {
230 "Indeterminate",
231 "Parity",
232 "Multihit",
233 };
234 static const char *mc_erat_types[] = {
235 "Indeterminate",
236 "Parity",
237 "Multihit",
238 };
239 static const char *mc_tlb_types[] = {
240 "Indeterminate",
241 "Parity",
242 "Multihit",
243 };
244
245 /* Print things out */
246 if (evt->version != MCE_V1) {
247 pr_err("Machine Check Exception, Unknown event version %d !\n",
248 evt->version);
249 return;
250 }
251 switch (evt->severity) {
252 case MCE_SEV_NO_ERROR:
253 level = KERN_INFO;
254 sevstr = "Harmless";
255 break;
256 case MCE_SEV_WARNING:
257 level = KERN_WARNING;
258 sevstr = "";
259 break;
260 case MCE_SEV_ERROR_SYNC:
261 level = KERN_ERR;
262 sevstr = "Severe";
263 break;
264 case MCE_SEV_FATAL:
265 default:
266 level = KERN_ERR;
267 sevstr = "Fatal";
268 break;
269 }
270
271 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
272 evt->disposition == MCE_DISPOSITION_RECOVERED ?
273 "Recovered" : "[Not recovered");
274 printk("%s Initiator: %s\n", level,
275 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
276 switch (evt->error_type) {
277 case MCE_ERROR_TYPE_UE:
278 subtype = evt->u.ue_error.ue_error_type <
279 ARRAY_SIZE(mc_ue_types) ?
280 mc_ue_types[evt->u.ue_error.ue_error_type]
281 : "Unknown";
282 printk("%s Error type: UE [%s]\n", level, subtype);
283 if (evt->u.ue_error.effective_address_provided)
284 printk("%s Effective address: %016llx\n",
285 level, evt->u.ue_error.effective_address);
286 if (evt->u.ue_error.physical_address_provided)
287 printk("%s Physial address: %016llx\n",
288 level, evt->u.ue_error.physical_address);
289 break;
290 case MCE_ERROR_TYPE_SLB:
291 subtype = evt->u.slb_error.slb_error_type <
292 ARRAY_SIZE(mc_slb_types) ?
293 mc_slb_types[evt->u.slb_error.slb_error_type]
294 : "Unknown";
295 printk("%s Error type: SLB [%s]\n", level, subtype);
296 if (evt->u.slb_error.effective_address_provided)
297 printk("%s Effective address: %016llx\n",
298 level, evt->u.slb_error.effective_address);
299 break;
300 case MCE_ERROR_TYPE_ERAT:
301 subtype = evt->u.erat_error.erat_error_type <
302 ARRAY_SIZE(mc_erat_types) ?
303 mc_erat_types[evt->u.erat_error.erat_error_type]
304 : "Unknown";
305 printk("%s Error type: ERAT [%s]\n", level, subtype);
306 if (evt->u.erat_error.effective_address_provided)
307 printk("%s Effective address: %016llx\n",
308 level, evt->u.erat_error.effective_address);
309 break;
310 case MCE_ERROR_TYPE_TLB:
311 subtype = evt->u.tlb_error.tlb_error_type <
312 ARRAY_SIZE(mc_tlb_types) ?
313 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
314 : "Unknown";
315 printk("%s Error type: TLB [%s]\n", level, subtype);
316 if (evt->u.tlb_error.effective_address_provided)
317 printk("%s Effective address: %016llx\n",
318 level, evt->u.tlb_error.effective_address);
319 break;
320 default:
321 case MCE_ERROR_TYPE_UNKNOWN:
322 printk("%s Error type: Unknown\n", level);
323 break;
324 }
325}
326
327uint64_t get_mce_fault_addr(struct machine_check_event *evt)
328{
329 switch (evt->error_type) {
330 case MCE_ERROR_TYPE_UE:
331 if (evt->u.ue_error.effective_address_provided)
332 return evt->u.ue_error.effective_address;
333 break;
334 case MCE_ERROR_TYPE_SLB:
335 if (evt->u.slb_error.effective_address_provided)
336 return evt->u.slb_error.effective_address;
337 break;
338 case MCE_ERROR_TYPE_ERAT:
339 if (evt->u.erat_error.effective_address_provided)
340 return evt->u.erat_error.effective_address;
341 break;
342 case MCE_ERROR_TYPE_TLB:
343 if (evt->u.tlb_error.effective_address_provided)
344 return evt->u.tlb_error.effective_address;
345 break;
346 default:
347 case MCE_ERROR_TYPE_UNKNOWN:
348 break;
349 }
350 return 0;
351}
352EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
new file mode 100644
index 000000000000..27c93f41166f
--- /dev/null
+++ b/arch/powerpc/kernel/mce_power.c
@@ -0,0 +1,284 @@
1/*
2 * Machine check exception handling CPU-side for power7 and power8
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce_power: " fmt
24
25#include <linux/types.h>
26#include <linux/ptrace.h>
27#include <asm/mmu.h>
28#include <asm/mce.h>
29
30/* flush SLBs and reload */
31static void flush_and_reload_slb(void)
32{
33 struct slb_shadow *slb;
34 unsigned long i, n;
35
36 /* Invalidate all SLBs */
37 asm volatile("slbmte %0,%0; slbia" : : "r" (0));
38
39#ifdef CONFIG_KVM_BOOK3S_HANDLER
40 /*
41 * If machine check is hit when in guest or in transition, we will
42 * only flush the SLBs and continue.
43 */
44 if (get_paca()->kvm_hstate.in_guest)
45 return;
46#endif
47
48 /* For host kernel, reload the SLBs from shadow SLB buffer. */
49 slb = get_slb_shadow();
50 if (!slb)
51 return;
52
53 n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
54
55 /* Load up the SLB entries from shadow SLB */
56 for (i = 0; i < n; i++) {
57 unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
58 unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
59
60 rb = (rb & ~0xFFFul) | i;
61 asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
62 }
63}
64
65static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
66{
67 long handled = 1;
68
69 /*
70 * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
71 * reset the error bits whenever we handle them so that at the end
72 * we can check whether we handled all of them or not.
73 * */
74 if (dsisr & slb_error_bits) {
75 flush_and_reload_slb();
76 /* reset error bits */
77 dsisr &= ~(slb_error_bits);
78 }
79 if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
80 if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
81 cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
82 /* reset error bits */
83 dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
84 }
85 /* Any other errors we don't understand? */
86 if (dsisr & 0xffffffffUL)
87 handled = 0;
88
89 return handled;
90}
91
92static long mce_handle_derror_p7(uint64_t dsisr)
93{
94 return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
95}
96
97static long mce_handle_common_ierror(uint64_t srr1)
98{
99 long handled = 0;
100
101 switch (P7_SRR1_MC_IFETCH(srr1)) {
102 case 0:
103 break;
104 case P7_SRR1_MC_IFETCH_SLB_PARITY:
105 case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
106 /* flush and reload SLBs for SLB errors. */
107 flush_and_reload_slb();
108 handled = 1;
109 break;
110 case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
111 if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
112 cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
113 handled = 1;
114 }
115 break;
116 default:
117 break;
118 }
119
120 return handled;
121}
122
123static long mce_handle_ierror_p7(uint64_t srr1)
124{
125 long handled = 0;
126
127 handled = mce_handle_common_ierror(srr1);
128
129 if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
130 flush_and_reload_slb();
131 handled = 1;
132 }
133 return handled;
134}
135
136static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
137{
138 switch (P7_SRR1_MC_IFETCH(srr1)) {
139 case P7_SRR1_MC_IFETCH_SLB_PARITY:
140 mce_err->error_type = MCE_ERROR_TYPE_SLB;
141 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
142 break;
143 case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
144 mce_err->error_type = MCE_ERROR_TYPE_SLB;
145 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
146 break;
147 case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
148 mce_err->error_type = MCE_ERROR_TYPE_TLB;
149 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
150 break;
151 case P7_SRR1_MC_IFETCH_UE:
152 case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
153 mce_err->error_type = MCE_ERROR_TYPE_UE;
154 mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
155 break;
156 case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
157 mce_err->error_type = MCE_ERROR_TYPE_UE;
158 mce_err->u.ue_error_type =
159 MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
160 break;
161 }
162}
163
164static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
165{
166 mce_get_common_ierror(mce_err, srr1);
167 if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
168 mce_err->error_type = MCE_ERROR_TYPE_SLB;
169 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
170 }
171}
172
173static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
174{
175 if (dsisr & P7_DSISR_MC_UE) {
176 mce_err->error_type = MCE_ERROR_TYPE_UE;
177 mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
178 } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
179 mce_err->error_type = MCE_ERROR_TYPE_UE;
180 mce_err->u.ue_error_type =
181 MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
182 } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
183 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
184 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
185 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
186 mce_err->error_type = MCE_ERROR_TYPE_SLB;
187 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
188 } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
189 mce_err->error_type = MCE_ERROR_TYPE_SLB;
190 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
191 } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
192 mce_err->error_type = MCE_ERROR_TYPE_TLB;
193 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
194 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
195 mce_err->error_type = MCE_ERROR_TYPE_SLB;
196 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
197 }
198}
199
200long __machine_check_early_realmode_p7(struct pt_regs *regs)
201{
202 uint64_t srr1, addr;
203 long handled = 1;
204 struct mce_error_info mce_error_info = { 0 };
205
206 srr1 = regs->msr;
207
208 /*
209 * Handle memory errors depending whether this was a load/store or
210 * ifetch exception. Also, populate the mce error_type and
211 * type-specific error_type from either SRR1 or DSISR, depending
212 * whether this was a load/store or ifetch exception
213 */
214 if (P7_SRR1_MC_LOADSTORE(srr1)) {
215 handled = mce_handle_derror_p7(regs->dsisr);
216 mce_get_derror_p7(&mce_error_info, regs->dsisr);
217 addr = regs->dar;
218 } else {
219 handled = mce_handle_ierror_p7(srr1);
220 mce_get_ierror_p7(&mce_error_info, srr1);
221 addr = regs->nip;
222 }
223
224 save_mce_event(regs, handled, &mce_error_info, addr);
225 return handled;
226}
227
228static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
229{
230 mce_get_common_ierror(mce_err, srr1);
231 if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
232 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
233 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
234 }
235}
236
237static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
238{
239 mce_get_derror_p7(mce_err, dsisr);
240 if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
241 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
242 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
243 }
244}
245
246static long mce_handle_ierror_p8(uint64_t srr1)
247{
248 long handled = 0;
249
250 handled = mce_handle_common_ierror(srr1);
251
252 if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
253 flush_and_reload_slb();
254 handled = 1;
255 }
256 return handled;
257}
258
259static long mce_handle_derror_p8(uint64_t dsisr)
260{
261 return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
262}
263
264long __machine_check_early_realmode_p8(struct pt_regs *regs)
265{
266 uint64_t srr1, addr;
267 long handled = 1;
268 struct mce_error_info mce_error_info = { 0 };
269
270 srr1 = regs->msr;
271
272 if (P7_SRR1_MC_LOADSTORE(srr1)) {
273 handled = mce_handle_derror_p8(regs->dsisr);
274 mce_get_derror_p8(&mce_error_info, regs->dsisr);
275 addr = regs->dar;
276 } else {
277 handled = mce_handle_ierror_p8(srr1);
278 mce_get_ierror_p8(&mce_error_info, srr1);
279 addr = regs->nip;
280 }
281
282 save_mce_event(regs, handled, &mce_error_info, addr);
283 return handled;
284}
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index e47d268727a4..7c6bb4b17b49 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -57,11 +57,14 @@ _GLOBAL(call_do_softirq)
57 mtlr r0 57 mtlr r0
58 blr 58 blr
59 59
60/*
61 * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
62 */
60_GLOBAL(call_do_irq) 63_GLOBAL(call_do_irq)
61 mflr r0 64 mflr r0
62 stw r0,4(r1) 65 stw r0,4(r1)
63 lwz r10,THREAD+KSP_LIMIT(r2) 66 lwz r10,THREAD+KSP_LIMIT(r2)
64 addi r11,r3,THREAD_INFO_GAP 67 addi r11,r4,THREAD_INFO_GAP
65 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) 68 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
66 mr r1,r4 69 mr r1,r4
67 stw r10,8(r1) 70 stw r10,8(r1)
@@ -344,7 +347,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
344 */ 347 */
345_KPROBE(flush_icache_range) 348_KPROBE(flush_icache_range)
346BEGIN_FTR_SECTION 349BEGIN_FTR_SECTION
347 isync 350 PURGE_PREFETCHED_INS
348 blr /* for 601, do nothing */ 351 blr /* for 601, do nothing */
349END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 352END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
350 li r5,L1_CACHE_BYTES-1 353 li r5,L1_CACHE_BYTES-1
@@ -448,6 +451,7 @@ _GLOBAL(invalidate_dcache_range)
448 */ 451 */
449_GLOBAL(__flush_dcache_icache) 452_GLOBAL(__flush_dcache_icache)
450BEGIN_FTR_SECTION 453BEGIN_FTR_SECTION
454 PURGE_PREFETCHED_INS
451 blr 455 blr
452END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 456END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
453 rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */ 457 rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
@@ -489,6 +493,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
489 */ 493 */
490_GLOBAL(__flush_dcache_icache_phys) 494_GLOBAL(__flush_dcache_icache_phys)
491BEGIN_FTR_SECTION 495BEGIN_FTR_SECTION
496 PURGE_PREFETCHED_INS
492 blr /* for 601, do nothing */ 497 blr /* for 601, do nothing */
493END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 498END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
494 mfmsr r10 499 mfmsr r10
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 64bf8db12b15..3d0249599d52 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -67,6 +67,7 @@ PPC64_CACHES:
67 67
68_KPROBE(flush_icache_range) 68_KPROBE(flush_icache_range)
69BEGIN_FTR_SECTION 69BEGIN_FTR_SECTION
70 PURGE_PREFETCHED_INS
70 blr 71 blr
71END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) 72END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
72/* 73/*
@@ -211,6 +212,11 @@ _GLOBAL(__flush_dcache_icache)
211 * Different systems have different cache line sizes 212 * Different systems have different cache line sizes
212 */ 213 */
213 214
215BEGIN_FTR_SECTION
216 PURGE_PREFETCHED_INS
217 blr
218END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
219
214/* Flush the dcache */ 220/* Flush the dcache */
215 ld r7,PPC64_CACHES@toc(r2) 221 ld r7,PPC64_CACHES@toc(r2)
216 clrrdi r3,r3,PAGE_SHIFT /* Page align */ 222 clrrdi r3,r3,PAGE_SHIFT /* Page align */
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0620eaaaad45..bf0aada02fe4 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -99,12 +99,28 @@ static inline void free_lppacas(void) { }
99 * 3 persistent SLBs are registered here. The buffer will be zero 99 * 3 persistent SLBs are registered here. The buffer will be zero
100 * initially, hence will all be invaild until we actually write them. 100 * initially, hence will all be invaild until we actually write them.
101 */ 101 */
102struct slb_shadow slb_shadow[] __cacheline_aligned = { 102static struct slb_shadow *slb_shadow;
103 [0 ... (NR_CPUS-1)] = { 103
104 .persistent = cpu_to_be32(SLB_NUM_BOLTED), 104static void __init allocate_slb_shadows(int nr_cpus, int limit)
105 .buffer_length = cpu_to_be32(sizeof(struct slb_shadow)), 105{
106 }, 106 int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
107}; 107 slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
108 memset(slb_shadow, 0, size);
109}
110
111static struct slb_shadow * __init init_slb_shadow(int cpu)
112{
113 struct slb_shadow *s = &slb_shadow[cpu];
114
115 s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
116 s->buffer_length = cpu_to_be32(sizeof(*s));
117
118 return s;
119}
120
121#else /* CONFIG_PPC_STD_MMU_64 */
122
123static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
108 124
109#endif /* CONFIG_PPC_STD_MMU_64 */ 125#endif /* CONFIG_PPC_STD_MMU_64 */
110 126
@@ -142,8 +158,13 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
142 new_paca->__current = &init_task; 158 new_paca->__current = &init_task;
143 new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; 159 new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
144#ifdef CONFIG_PPC_STD_MMU_64 160#ifdef CONFIG_PPC_STD_MMU_64
145 new_paca->slb_shadow_ptr = &slb_shadow[cpu]; 161 new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
146#endif /* CONFIG_PPC_STD_MMU_64 */ 162#endif /* CONFIG_PPC_STD_MMU_64 */
163
164#ifdef CONFIG_PPC_BOOK3E
165 /* For now -- if we have threads this will be adjusted later */
166 new_paca->tcd_ptr = &new_paca->tcd;
167#endif
147} 168}
148 169
149/* Put the paca pointer into r13 and SPRG_PACA */ 170/* Put the paca pointer into r13 and SPRG_PACA */
@@ -190,6 +211,8 @@ void __init allocate_pacas(void)
190 211
191 allocate_lppacas(nr_cpu_ids, limit); 212 allocate_lppacas(nr_cpu_ids, limit);
192 213
214 allocate_slb_shadows(nr_cpu_ids, limit);
215
193 /* Can't use for_each_*_cpu, as they aren't functional yet */ 216 /* Can't use for_each_*_cpu, as they aren't functional yet */
194 for (cpu = 0; cpu < nr_cpu_ids; cpu++) 217 for (cpu = 0; cpu < nr_cpu_ids; cpu++)
195 initialise_paca(&paca[cpu], cpu); 218 initialise_paca(&paca[cpu], cpu);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index a1e3e40ca3fd..d9476c1fc959 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -835,7 +835,7 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
835 * at 0 as unset as well, except if PCI_PROBE_ONLY is also set 835 * at 0 as unset as well, except if PCI_PROBE_ONLY is also set
836 * since in that case, we don't want to re-assign anything 836 * since in that case, we don't want to re-assign anything
837 */ 837 */
838 pcibios_resource_to_bus(dev, &reg, res); 838 pcibios_resource_to_bus(dev->bus, &reg, res);
839 if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) || 839 if (pci_has_flag(PCI_REASSIGN_ALL_RSRC) ||
840 (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) { 840 (reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
841 /* Only print message if not re-assigning */ 841 /* Only print message if not re-assigning */
@@ -886,7 +886,7 @@ static int pcibios_uninitialized_bridge_resource(struct pci_bus *bus,
886 886
887 /* Job is a bit different between memory and IO */ 887 /* Job is a bit different between memory and IO */
888 if (res->flags & IORESOURCE_MEM) { 888 if (res->flags & IORESOURCE_MEM) {
889 pcibios_resource_to_bus(dev, &region, res); 889 pcibios_resource_to_bus(dev->bus, &region, res);
890 890
891 /* If the BAR is non-0 then it's probably been initialized */ 891 /* If the BAR is non-0 then it's probably been initialized */
892 if (region.start != 0) 892 if (region.start != 0)
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index ac0b034f9ae0..83c26d829991 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -111,7 +111,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
111 res->name = pci_name(dev); 111 res->name = pci_name(dev);
112 region.start = base; 112 region.start = base;
113 region.end = base + size - 1; 113 region.end = base + size - 1;
114 pcibios_bus_to_resource(dev, res, &region); 114 pcibios_bus_to_resource(dev->bus, res, &region);
115 } 115 }
116} 116}
117 117
@@ -280,7 +280,7 @@ void of_scan_pci_bridge(struct pci_dev *dev)
280 res->flags = flags; 280 res->flags = flags;
281 region.start = of_read_number(&ranges[1], 2); 281 region.start = of_read_number(&ranges[1], 2);
282 region.end = region.start + size - 1; 282 region.end = region.start + size - 1;
283 pcibios_bus_to_resource(dev, res, &region); 283 pcibios_bus_to_resource(dev->bus, res, &region);
284 } 284 }
285 sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus), 285 sprintf(bus->name, "PCI Bus %04x:%02x", pci_domain_nr(bus),
286 bus->number); 286 bus->number);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4a96556fd2d4..af064d28b365 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -25,7 +25,6 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/user.h> 26#include <linux/user.h>
27#include <linux/elf.h> 27#include <linux/elf.h>
28#include <linux/init.h>
29#include <linux/prctl.h> 28#include <linux/prctl.h>
30#include <linux/init_task.h> 29#include <linux/init_task.h>
31#include <linux/export.h> 30#include <linux/export.h>
@@ -74,6 +73,48 @@ struct task_struct *last_task_used_vsx = NULL;
74struct task_struct *last_task_used_spe = NULL; 73struct task_struct *last_task_used_spe = NULL;
75#endif 74#endif
76 75
76#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
77void giveup_fpu_maybe_transactional(struct task_struct *tsk)
78{
79 /*
80 * If we are saving the current thread's registers, and the
81 * thread is in a transactional state, set the TIF_RESTORE_TM
82 * bit so that we know to restore the registers before
83 * returning to userspace.
84 */
85 if (tsk == current && tsk->thread.regs &&
86 MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
87 !test_thread_flag(TIF_RESTORE_TM)) {
88 tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
89 set_thread_flag(TIF_RESTORE_TM);
90 }
91
92 giveup_fpu(tsk);
93}
94
95void giveup_altivec_maybe_transactional(struct task_struct *tsk)
96{
97 /*
98 * If we are saving the current thread's registers, and the
99 * thread is in a transactional state, set the TIF_RESTORE_TM
100 * bit so that we know to restore the registers before
101 * returning to userspace.
102 */
103 if (tsk == current && tsk->thread.regs &&
104 MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
105 !test_thread_flag(TIF_RESTORE_TM)) {
106 tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
107 set_thread_flag(TIF_RESTORE_TM);
108 }
109
110 giveup_altivec(tsk);
111}
112
113#else
114#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk)
115#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk)
116#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
117
77#ifdef CONFIG_PPC_FPU 118#ifdef CONFIG_PPC_FPU
78/* 119/*
79 * Make sure the floating-point register state in the 120 * Make sure the floating-point register state in the
@@ -102,13 +143,13 @@ void flush_fp_to_thread(struct task_struct *tsk)
102 */ 143 */
103 BUG_ON(tsk != current); 144 BUG_ON(tsk != current);
104#endif 145#endif
105 giveup_fpu(tsk); 146 giveup_fpu_maybe_transactional(tsk);
106 } 147 }
107 preempt_enable(); 148 preempt_enable();
108 } 149 }
109} 150}
110EXPORT_SYMBOL_GPL(flush_fp_to_thread); 151EXPORT_SYMBOL_GPL(flush_fp_to_thread);
111#endif 152#endif /* CONFIG_PPC_FPU */
112 153
113void enable_kernel_fp(void) 154void enable_kernel_fp(void)
114{ 155{
@@ -116,11 +157,11 @@ void enable_kernel_fp(void)
116 157
117#ifdef CONFIG_SMP 158#ifdef CONFIG_SMP
118 if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) 159 if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
119 giveup_fpu(current); 160 giveup_fpu_maybe_transactional(current);
120 else 161 else
121 giveup_fpu(NULL); /* just enables FP for kernel */ 162 giveup_fpu(NULL); /* just enables FP for kernel */
122#else 163#else
123 giveup_fpu(last_task_used_math); 164 giveup_fpu_maybe_transactional(last_task_used_math);
124#endif /* CONFIG_SMP */ 165#endif /* CONFIG_SMP */
125} 166}
126EXPORT_SYMBOL(enable_kernel_fp); 167EXPORT_SYMBOL(enable_kernel_fp);
@@ -132,11 +173,11 @@ void enable_kernel_altivec(void)
132 173
133#ifdef CONFIG_SMP 174#ifdef CONFIG_SMP
134 if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) 175 if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
135 giveup_altivec(current); 176 giveup_altivec_maybe_transactional(current);
136 else 177 else
137 giveup_altivec_notask(); 178 giveup_altivec_notask();
138#else 179#else
139 giveup_altivec(last_task_used_altivec); 180 giveup_altivec_maybe_transactional(last_task_used_altivec);
140#endif /* CONFIG_SMP */ 181#endif /* CONFIG_SMP */
141} 182}
142EXPORT_SYMBOL(enable_kernel_altivec); 183EXPORT_SYMBOL(enable_kernel_altivec);
@@ -153,7 +194,7 @@ void flush_altivec_to_thread(struct task_struct *tsk)
153#ifdef CONFIG_SMP 194#ifdef CONFIG_SMP
154 BUG_ON(tsk != current); 195 BUG_ON(tsk != current);
155#endif 196#endif
156 giveup_altivec(tsk); 197 giveup_altivec_maybe_transactional(tsk);
157 } 198 }
158 preempt_enable(); 199 preempt_enable();
159 } 200 }
@@ -182,8 +223,8 @@ EXPORT_SYMBOL(enable_kernel_vsx);
182 223
183void giveup_vsx(struct task_struct *tsk) 224void giveup_vsx(struct task_struct *tsk)
184{ 225{
185 giveup_fpu(tsk); 226 giveup_fpu_maybe_transactional(tsk);
186 giveup_altivec(tsk); 227 giveup_altivec_maybe_transactional(tsk);
187 __giveup_vsx(tsk); 228 __giveup_vsx(tsk);
188} 229}
189 230
@@ -479,7 +520,48 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
479 return false; 520 return false;
480 return true; 521 return true;
481} 522}
523
482#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 524#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
525static void tm_reclaim_thread(struct thread_struct *thr,
526 struct thread_info *ti, uint8_t cause)
527{
528 unsigned long msr_diff = 0;
529
530 /*
531 * If FP/VSX registers have been already saved to the
532 * thread_struct, move them to the transact_fp array.
533 * We clear the TIF_RESTORE_TM bit since after the reclaim
534 * the thread will no longer be transactional.
535 */
536 if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
537 msr_diff = thr->tm_orig_msr & ~thr->regs->msr;
538 if (msr_diff & MSR_FP)
539 memcpy(&thr->transact_fp, &thr->fp_state,
540 sizeof(struct thread_fp_state));
541 if (msr_diff & MSR_VEC)
542 memcpy(&thr->transact_vr, &thr->vr_state,
543 sizeof(struct thread_vr_state));
544 clear_ti_thread_flag(ti, TIF_RESTORE_TM);
545 msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
546 }
547
548 tm_reclaim(thr, thr->regs->msr, cause);
549
550 /* Having done the reclaim, we now have the checkpointed
551 * FP/VSX values in the registers. These might be valid
552 * even if we have previously called enable_kernel_fp() or
553 * flush_fp_to_thread(), so update thr->regs->msr to
554 * indicate their current validity.
555 */
556 thr->regs->msr |= msr_diff;
557}
558
559void tm_reclaim_current(uint8_t cause)
560{
561 tm_enable();
562 tm_reclaim_thread(&current->thread, current_thread_info(), cause);
563}
564
483static inline void tm_reclaim_task(struct task_struct *tsk) 565static inline void tm_reclaim_task(struct task_struct *tsk)
484{ 566{
485 /* We have to work out if we're switching from/to a task that's in the 567 /* We have to work out if we're switching from/to a task that's in the
@@ -502,9 +584,11 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
502 584
503 /* Stash the original thread MSR, as giveup_fpu et al will 585 /* Stash the original thread MSR, as giveup_fpu et al will
504 * modify it. We hold onto it to see whether the task used 586 * modify it. We hold onto it to see whether the task used
505 * FP & vector regs. 587 * FP & vector regs. If the TIF_RESTORE_TM flag is set,
588 * tm_orig_msr is already set.
506 */ 589 */
507 thr->tm_orig_msr = thr->regs->msr; 590 if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM))
591 thr->tm_orig_msr = thr->regs->msr;
508 592
509 TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, " 593 TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
510 "ccr=%lx, msr=%lx, trap=%lx)\n", 594 "ccr=%lx, msr=%lx, trap=%lx)\n",
@@ -512,7 +596,7 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
512 thr->regs->ccr, thr->regs->msr, 596 thr->regs->ccr, thr->regs->msr,
513 thr->regs->trap); 597 thr->regs->trap);
514 598
515 tm_reclaim(thr, thr->regs->msr, TM_CAUSE_RESCHED); 599 tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED);
516 600
517 TM_DEBUG("--- tm_reclaim on pid %d complete\n", 601 TM_DEBUG("--- tm_reclaim on pid %d complete\n",
518 tsk->pid); 602 tsk->pid);
@@ -588,6 +672,43 @@ static inline void __switch_to_tm(struct task_struct *prev)
588 tm_reclaim_task(prev); 672 tm_reclaim_task(prev);
589 } 673 }
590} 674}
675
676/*
677 * This is called if we are on the way out to userspace and the
678 * TIF_RESTORE_TM flag is set. It checks if we need to reload
679 * FP and/or vector state and does so if necessary.
680 * If userspace is inside a transaction (whether active or
681 * suspended) and FP/VMX/VSX instructions have ever been enabled
682 * inside that transaction, then we have to keep them enabled
683 * and keep the FP/VMX/VSX state loaded while ever the transaction
684 * continues. The reason is that if we didn't, and subsequently
685 * got a FP/VMX/VSX unavailable interrupt inside a transaction,
686 * we don't know whether it's the same transaction, and thus we
687 * don't know which of the checkpointed state and the transactional
688 * state to use.
689 */
690void restore_tm_state(struct pt_regs *regs)
691{
692 unsigned long msr_diff;
693
694 clear_thread_flag(TIF_RESTORE_TM);
695 if (!MSR_TM_ACTIVE(regs->msr))
696 return;
697
698 msr_diff = current->thread.tm_orig_msr & ~regs->msr;
699 msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
700 if (msr_diff & MSR_FP) {
701 fp_enable();
702 load_fp_state(&current->thread.fp_state);
703 regs->msr |= current->thread.fpexc_mode;
704 }
705 if (msr_diff & MSR_VEC) {
706 vec_enable();
707 load_vr_state(&current->thread.vr_state);
708 }
709 regs->msr |= msr_diff;
710}
711
591#else 712#else
592#define tm_recheckpoint_new_task(new) 713#define tm_recheckpoint_new_task(new)
593#define __switch_to_tm(prev) 714#define __switch_to_tm(prev)
@@ -690,7 +811,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
690 * schedule DABR 811 * schedule DABR
691 */ 812 */
692#ifndef CONFIG_HAVE_HW_BREAKPOINT 813#ifndef CONFIG_HAVE_HW_BREAKPOINT
693 if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk))) 814 if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
694 set_breakpoint(&new->thread.hw_brk); 815 set_breakpoint(&new->thread.hw_brk);
695#endif /* CONFIG_HAVE_HW_BREAKPOINT */ 816#endif /* CONFIG_HAVE_HW_BREAKPOINT */
696#endif 817#endif
@@ -927,6 +1048,15 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
927 flush_altivec_to_thread(src); 1048 flush_altivec_to_thread(src);
928 flush_vsx_to_thread(src); 1049 flush_vsx_to_thread(src);
929 flush_spe_to_thread(src); 1050 flush_spe_to_thread(src);
1051 /*
1052 * Flush TM state out so we can copy it. __switch_to_tm() does this
1053 * flush but it removes the checkpointed state from the current CPU and
1054 * transitions the CPU out of TM mode. Hence we need to call
1055 * tm_recheckpoint_new_task() (on the same task) to restore the
1056 * checkpointed state back and the TM mode.
1057 */
1058 __switch_to_tm(src);
1059 tm_recheckpoint_new_task(src);
930 1060
931 *dst = *src; 1061 *dst = *src;
932 1062
@@ -1175,6 +1305,19 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
1175 if (val & PR_FP_EXC_SW_ENABLE) { 1305 if (val & PR_FP_EXC_SW_ENABLE) {
1176#ifdef CONFIG_SPE 1306#ifdef CONFIG_SPE
1177 if (cpu_has_feature(CPU_FTR_SPE)) { 1307 if (cpu_has_feature(CPU_FTR_SPE)) {
1308 /*
1309 * When the sticky exception bits are set
1310 * directly by userspace, it must call prctl
1311 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
1312 * in the existing prctl settings) or
1313 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
1314 * the bits being set). <fenv.h> functions
1315 * saving and restoring the whole
1316 * floating-point environment need to do so
1317 * anyway to restore the prctl settings from
1318 * the saved environment.
1319 */
1320 tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
1178 tsk->thread.fpexc_mode = val & 1321 tsk->thread.fpexc_mode = val &
1179 (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); 1322 (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
1180 return 0; 1323 return 0;
@@ -1206,9 +1349,22 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
1206 1349
1207 if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) 1350 if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
1208#ifdef CONFIG_SPE 1351#ifdef CONFIG_SPE
1209 if (cpu_has_feature(CPU_FTR_SPE)) 1352 if (cpu_has_feature(CPU_FTR_SPE)) {
1353 /*
1354 * When the sticky exception bits are set
1355 * directly by userspace, it must call prctl
1356 * with PR_GET_FPEXC (with PR_FP_EXC_SW_ENABLE
1357 * in the existing prctl settings) or
1358 * PR_SET_FPEXC (with PR_FP_EXC_SW_ENABLE in
1359 * the bits being set). <fenv.h> functions
1360 * saving and restoring the whole
1361 * floating-point environment need to do so
1362 * anyway to restore the prctl settings from
1363 * the saved environment.
1364 */
1365 tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
1210 val = tsk->thread.fpexc_mode; 1366 val = tsk->thread.fpexc_mode;
1211 else 1367 } else
1212 return -EINVAL; 1368 return -EINVAL;
1213#else 1369#else
1214 return -EINVAL; 1370 return -EINVAL;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fa0ad8aafbcc..f58c0d3aaeb4 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -523,6 +523,20 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
523 return early_init_dt_scan_memory(node, uname, depth, data); 523 return early_init_dt_scan_memory(node, uname, depth, data);
524} 524}
525 525
526/*
527 * For a relocatable kernel, we need to get the memstart_addr first,
528 * then use it to calculate the virtual kernel start address. This has
529 * to happen at a very early stage (before machine_init). In this case,
530 * we just want to get the memstart_address and would not like to mess the
531 * memblock at this stage. So introduce a variable to skip the memblock_add()
532 * for this reason.
533 */
534#ifdef CONFIG_RELOCATABLE
535static int add_mem_to_memblock = 1;
536#else
537#define add_mem_to_memblock 1
538#endif
539
526void __init early_init_dt_add_memory_arch(u64 base, u64 size) 540void __init early_init_dt_add_memory_arch(u64 base, u64 size)
527{ 541{
528#ifdef CONFIG_PPC64 542#ifdef CONFIG_PPC64
@@ -543,7 +557,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
543 } 557 }
544 558
545 /* Add the chunk to the MEMBLOCK list */ 559 /* Add the chunk to the MEMBLOCK list */
546 memblock_add(base, size); 560 if (add_mem_to_memblock)
561 memblock_add(base, size);
547} 562}
548 563
549static void __init early_reserve_mem_dt(void) 564static void __init early_reserve_mem_dt(void)
@@ -740,6 +755,30 @@ void __init early_init_devtree(void *params)
740 DBG(" <- early_init_devtree()\n"); 755 DBG(" <- early_init_devtree()\n");
741} 756}
742 757
758#ifdef CONFIG_RELOCATABLE
759/*
760 * This function run before early_init_devtree, so we have to init
761 * initial_boot_params.
762 */
763void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
764{
765 /* Setup flat device-tree pointer */
766 initial_boot_params = params;
767
768 /*
769 * Scan the memory nodes and set add_mem_to_memblock to 0 to avoid
770 * mess the memblock.
771 */
772 add_mem_to_memblock = 0;
773 of_scan_flat_dt(early_init_dt_scan_root, NULL);
774 of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
775 add_mem_to_memblock = 1;
776
777 if (size)
778 *size = first_memblock_size;
779}
780#endif
781
743/******* 782/*******
744 * 783 *
745 * New implementation of the OF "find" APIs, return a refcounted 784 * New implementation of the OF "find" APIs, return a refcounted
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index cb64a6e1dc51..078145acf7fb 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1986,19 +1986,23 @@ static void __init prom_init_stdout(void)
1986 /* Get the full OF pathname of the stdout device */ 1986 /* Get the full OF pathname of the stdout device */
1987 memset(path, 0, 256); 1987 memset(path, 0, 256);
1988 call_prom("instance-to-path", 3, 1, prom.stdout, path, 255); 1988 call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
1989 stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
1990 val = cpu_to_be32(stdout_node);
1991 prom_setprop(prom.chosen, "/chosen", "linux,stdout-package",
1992 &val, sizeof(val));
1993 prom_printf("OF stdout device is: %s\n", of_stdout_device); 1989 prom_printf("OF stdout device is: %s\n", of_stdout_device);
1994 prom_setprop(prom.chosen, "/chosen", "linux,stdout-path", 1990 prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
1995 path, strlen(path) + 1); 1991 path, strlen(path) + 1);
1996 1992
1997 /* If it's a display, note it */ 1993 /* instance-to-package fails on PA-Semi */
1998 memset(type, 0, sizeof(type)); 1994 stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
1999 prom_getprop(stdout_node, "device_type", type, sizeof(type)); 1995 if (stdout_node != PROM_ERROR) {
2000 if (strcmp(type, "display") == 0) 1996 val = cpu_to_be32(stdout_node);
2001 prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0); 1997 prom_setprop(prom.chosen, "/chosen", "linux,stdout-package",
1998 &val, sizeof(val));
1999
2000 /* If it's a display, note it */
2001 memset(type, 0, sizeof(type));
2002 prom_getprop(stdout_node, "device_type", type, sizeof(type));
2003 if (strcmp(type, "display") == 0)
2004 prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
2005 }
2002} 2006}
2003 2007
2004static int __init prom_find_machine_type(void) 2008static int __init prom_find_machine_type(void)
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index b47a0e1ab001..d88736fbece6 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -69,8 +69,8 @@ _GLOBAL(relocate)
69 * R_PPC64_RELATIVE ones. 69 * R_PPC64_RELATIVE ones.
70 */ 70 */
71 mtctr r8 71 mtctr r8
725: lwz r0,12(9) /* ELF64_R_TYPE(reloc->r_info) */ 725: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */
73 cmpwi r0,R_PPC64_RELATIVE 73 cmpdi r0,R_PPC64_RELATIVE
74 bne 6f 74 bne 6f
75 ld r6,0(r9) /* reloc->r_offset */ 75 ld r6,0(r9) /* reloc->r_offset */
76 ld r0,16(r9) /* reloc->r_addend */ 76 ld r0,16(r9) /* reloc->r_addend */
@@ -81,6 +81,7 @@ _GLOBAL(relocate)
81 81
826: blr 826: blr
83 83
84.balign 8
84p_dyn: .llong __dynamic_start - 0b 85p_dyn: .llong __dynamic_start - 0b
85p_rela: .llong __rela_dyn_start - 0b 86p_rela: .llong __rela_dyn_start - 0b
86p_st: .llong _stext - 0b 87p_st: .llong _stext - 0b
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index b903dc5cf944..04cc4fcca78b 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -247,7 +247,12 @@ static void __init exc_lvl_early_init(void)
247 /* interrupt stacks must be in lowmem, we get that for free on ppc32 247 /* interrupt stacks must be in lowmem, we get that for free on ppc32
248 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */ 248 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
249 for_each_possible_cpu(i) { 249 for_each_possible_cpu(i) {
250#ifdef CONFIG_SMP
250 hw_cpu = get_hard_smp_processor_id(i); 251 hw_cpu = get_hard_smp_processor_id(i);
252#else
253 hw_cpu = 0;
254#endif
255
251 critirq_ctx[hw_cpu] = (struct thread_info *) 256 critirq_ctx[hw_cpu] = (struct thread_info *)
252 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE)); 257 __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
253#ifdef CONFIG_BOOKE 258#ifdef CONFIG_BOOKE
@@ -296,9 +301,6 @@ void __init setup_arch(char **cmdline_p)
296 if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) 301 if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
297 ucache_bsize = icache_bsize = dcache_bsize; 302 ucache_bsize = icache_bsize = dcache_bsize;
298 303
299 /* reboot on panic */
300 panic_timeout = 180;
301
302 if (ppc_md.panic) 304 if (ppc_md.panic)
303 setup_panic(); 305 setup_panic();
304 306
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4085aaa9478f..f5f11a7d30e5 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -97,6 +97,36 @@ int dcache_bsize;
97int icache_bsize; 97int icache_bsize;
98int ucache_bsize; 98int ucache_bsize;
99 99
100#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
101static void setup_tlb_core_data(void)
102{
103 int cpu;
104
105 for_each_possible_cpu(cpu) {
106 int first = cpu_first_thread_sibling(cpu);
107
108 paca[cpu].tcd_ptr = &paca[first].tcd;
109
110 /*
111 * If we have threads, we need either tlbsrx.
112 * or e6500 tablewalk mode, or else TLB handlers
113 * will be racy and could produce duplicate entries.
114 */
115 if (smt_enabled_at_boot >= 2 &&
116 !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
117 book3e_htw_mode != PPC_HTW_E6500) {
118 /* Should we panic instead? */
119 WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
120 __func__);
121 }
122 }
123}
124#else
125static void setup_tlb_core_data(void)
126{
127}
128#endif
129
100#ifdef CONFIG_SMP 130#ifdef CONFIG_SMP
101 131
102static char *smt_enabled_cmdline; 132static char *smt_enabled_cmdline;
@@ -445,6 +475,7 @@ void __init setup_system(void)
445 475
446 smp_setup_cpu_maps(); 476 smp_setup_cpu_maps();
447 check_smt_enabled(); 477 check_smt_enabled();
478 setup_tlb_core_data();
448 479
449#ifdef CONFIG_SMP 480#ifdef CONFIG_SMP
450 /* Release secondary cpus out of their spinloops at 0x60 now that 481 /* Release secondary cpus out of their spinloops at 0x60 now that
@@ -520,9 +551,6 @@ static void __init irqstack_early_init(void)
520#ifdef CONFIG_PPC_BOOK3E 551#ifdef CONFIG_PPC_BOOK3E
521static void __init exc_lvl_early_init(void) 552static void __init exc_lvl_early_init(void)
522{ 553{
523 extern unsigned int interrupt_base_book3e;
524 extern unsigned int exc_debug_debug_book3e;
525
526 unsigned int i; 554 unsigned int i;
527 555
528 for_each_possible_cpu(i) { 556 for_each_possible_cpu(i) {
@@ -535,8 +563,7 @@ static void __init exc_lvl_early_init(void)
535 } 563 }
536 564
537 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) 565 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
538 patch_branch(&interrupt_base_book3e + (0x040 / 4) + 1, 566 patch_exception(0x040, exc_debug_debug_book3e);
539 (unsigned long)&exc_debug_debug_book3e, 0);
540} 567}
541#else 568#else
542#define exc_lvl_early_init() 569#define exc_lvl_early_init()
@@ -544,7 +571,8 @@ static void __init exc_lvl_early_init(void)
544 571
545/* 572/*
546 * Stack space used when we detect a bad kernel stack pointer, and 573 * Stack space used when we detect a bad kernel stack pointer, and
547 * early in SMP boots before relocation is enabled. 574 * early in SMP boots before relocation is enabled. Exclusive emergency
575 * stack for machine checks.
548 */ 576 */
549static void __init emergency_stack_init(void) 577static void __init emergency_stack_init(void)
550{ 578{
@@ -567,6 +595,13 @@ static void __init emergency_stack_init(void)
567 sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit); 595 sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
568 sp += THREAD_SIZE; 596 sp += THREAD_SIZE;
569 paca[i].emergency_sp = __va(sp); 597 paca[i].emergency_sp = __va(sp);
598
599#ifdef CONFIG_PPC_BOOK3S_64
600 /* emergency stack for machine check exception handling. */
601 sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
602 sp += THREAD_SIZE;
603 paca[i].mc_emergency_sp = __va(sp);
604#endif
570 } 605 }
571} 606}
572 607
@@ -588,9 +623,6 @@ void __init setup_arch(char **cmdline_p)
588 dcache_bsize = ppc64_caches.dline_size; 623 dcache_bsize = ppc64_caches.dline_size;
589 icache_bsize = ppc64_caches.iline_size; 624 icache_bsize = ppc64_caches.iline_size;
590 625
591 /* reboot on panic */
592 panic_timeout = 180;
593
594 if (ppc_md.panic) 626 if (ppc_md.panic)
595 setup_panic(); 627 setup_panic();
596 628
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 457e97aa2945..8fc4177ed65a 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -203,8 +203,7 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs)
203 203
204#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 204#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
205 if (MSR_TM_ACTIVE(regs->msr)) { 205 if (MSR_TM_ACTIVE(regs->msr)) {
206 tm_enable(); 206 tm_reclaim_current(TM_CAUSE_SIGNAL);
207 tm_reclaim(&current->thread, regs->msr, TM_CAUSE_SIGNAL);
208 if (MSR_TM_TRANSACTIONAL(regs->msr)) 207 if (MSR_TM_TRANSACTIONAL(regs->msr))
209 return current->thread.ckpt_regs.gpr[1]; 208 return current->thread.ckpt_regs.gpr[1];
210 } 209 }
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 68027bfa5f8e..a67e00aa3caa 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -519,6 +519,13 @@ static int save_tm_user_regs(struct pt_regs *regs,
519{ 519{
520 unsigned long msr = regs->msr; 520 unsigned long msr = regs->msr;
521 521
522 /* Remove TM bits from thread's MSR. The MSR in the sigcontext
523 * just indicates to userland that we were doing a transaction, but we
524 * don't want to return in transactional state. This also ensures
525 * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
526 */
527 regs->msr &= ~MSR_TS_MASK;
528
522 /* Make sure floating point registers are stored in regs */ 529 /* Make sure floating point registers are stored in regs */
523 flush_fp_to_thread(current); 530 flush_fp_to_thread(current);
524 531
@@ -1015,29 +1022,24 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
1015#ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1022#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1016 tm_frame = &rt_sf->uc_transact.uc_mcontext; 1023 tm_frame = &rt_sf->uc_transact.uc_mcontext;
1017 if (MSR_TM_ACTIVE(regs->msr)) { 1024 if (MSR_TM_ACTIVE(regs->msr)) {
1025 if (__put_user((unsigned long)&rt_sf->uc_transact,
1026 &rt_sf->uc.uc_link) ||
1027 __put_user((unsigned long)tm_frame,
1028 &rt_sf->uc_transact.uc_regs))
1029 goto badframe;
1018 if (save_tm_user_regs(regs, frame, tm_frame, sigret)) 1030 if (save_tm_user_regs(regs, frame, tm_frame, sigret))
1019 goto badframe; 1031 goto badframe;
1020 } 1032 }
1021 else 1033 else
1022#endif 1034#endif
1023 { 1035 {
1036 if (__put_user(0, &rt_sf->uc.uc_link))
1037 goto badframe;
1024 if (save_user_regs(regs, frame, tm_frame, sigret, 1)) 1038 if (save_user_regs(regs, frame, tm_frame, sigret, 1))
1025 goto badframe; 1039 goto badframe;
1026 } 1040 }
1027 regs->link = tramp; 1041 regs->link = tramp;
1028 1042
1029#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1030 if (MSR_TM_ACTIVE(regs->msr)) {
1031 if (__put_user((unsigned long)&rt_sf->uc_transact,
1032 &rt_sf->uc.uc_link)
1033 || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs))
1034 goto badframe;
1035 }
1036 else
1037#endif
1038 if (__put_user(0, &rt_sf->uc.uc_link))
1039 goto badframe;
1040
1041 current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */ 1043 current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
1042 1044
1043 /* create a stack frame for the caller of the handler */ 1045 /* create a stack frame for the caller of the handler */
@@ -1056,13 +1058,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
1056 /* enter the signal handler in native-endian mode */ 1058 /* enter the signal handler in native-endian mode */
1057 regs->msr &= ~MSR_LE; 1059 regs->msr &= ~MSR_LE;
1058 regs->msr |= (MSR_KERNEL & MSR_LE); 1060 regs->msr |= (MSR_KERNEL & MSR_LE);
1059#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1060 /* Remove TM bits from thread's MSR. The MSR in the sigcontext
1061 * just indicates to userland that we were doing a transaction, but we
1062 * don't want to return in transactional state:
1063 */
1064 regs->msr &= ~MSR_TS_MASK;
1065#endif
1066 return 1; 1061 return 1;
1067 1062
1068badframe: 1063badframe:
@@ -1484,13 +1479,6 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
1484 regs->nip = (unsigned long) ka->sa.sa_handler; 1479 regs->nip = (unsigned long) ka->sa.sa_handler;
1485 /* enter the signal handler in big-endian mode */ 1480 /* enter the signal handler in big-endian mode */
1486 regs->msr &= ~MSR_LE; 1481 regs->msr &= ~MSR_LE;
1487#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1488 /* Remove TM bits from thread's MSR. The MSR in the sigcontext
1489 * just indicates to userland that we were doing a transaction, but we
1490 * don't want to return in transactional state:
1491 */
1492 regs->msr &= ~MSR_TS_MASK;
1493#endif
1494 return 1; 1482 return 1;
1495 1483
1496badframe: 1484badframe:
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 42991045349f..8d253c29649b 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -65,8 +65,8 @@ struct rt_sigframe {
65 struct siginfo __user *pinfo; 65 struct siginfo __user *pinfo;
66 void __user *puc; 66 void __user *puc;
67 struct siginfo info; 67 struct siginfo info;
68 /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */ 68 /* New 64 bit little-endian ABI allows redzone of 512 bytes below sp */
69 char abigap[288]; 69 char abigap[USER_REDZONE_SIZE];
70} __attribute__ ((aligned (16))); 70} __attribute__ ((aligned (16)));
71 71
72static const char fmt32[] = KERN_INFO \ 72static const char fmt32[] = KERN_INFO \
@@ -192,6 +192,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
192 192
193 BUG_ON(!MSR_TM_ACTIVE(regs->msr)); 193 BUG_ON(!MSR_TM_ACTIVE(regs->msr));
194 194
195 /* Remove TM bits from thread's MSR. The MSR in the sigcontext
196 * just indicates to userland that we were doing a transaction, but we
197 * don't want to return in transactional state. This also ensures
198 * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
199 */
200 regs->msr &= ~MSR_TS_MASK;
201
195 flush_fp_to_thread(current); 202 flush_fp_to_thread(current);
196 203
197#ifdef CONFIG_ALTIVEC 204#ifdef CONFIG_ALTIVEC
@@ -749,13 +756,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
749 756
750 /* Make sure signal handler doesn't get spurious FP exceptions */ 757 /* Make sure signal handler doesn't get spurious FP exceptions */
751 current->thread.fp_state.fpscr = 0; 758 current->thread.fp_state.fpscr = 0;
752#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
753 /* Remove TM bits from thread's MSR. The MSR in the sigcontext
754 * just indicates to userland that we were doing a transaction, but we
755 * don't want to return in transactional state:
756 */
757 regs->msr &= ~MSR_TS_MASK;
758#endif
759 759
760 /* Set up to return from userspace. */ 760 /* Set up to return from userspace. */
761 if (vdso64_rt_sigtramp && current->mm->context.vdso_base) { 761 if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c
index e68fd1ae727a..7a37ecd3afa3 100644
--- a/arch/powerpc/kernel/smp-tbsync.c
+++ b/arch/powerpc/kernel/smp-tbsync.c
@@ -9,7 +9,6 @@
9#include <linux/sched.h> 9#include <linux/sched.h>
10#include <linux/smp.h> 10#include <linux/smp.h>
11#include <linux/unistd.h> 11#include <linux/unistd.h>
12#include <linux/init.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
14#include <linux/atomic.h> 13#include <linux/atomic.h>
15#include <asm/smp.h> 14#include <asm/smp.h>
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index c1cf4a1522d9..ac2621af3154 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -369,13 +369,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
369 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); 369 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
370 cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); 370 cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
371 371
372 if (smp_ops) 372 if (smp_ops && smp_ops->probe)
373 if (smp_ops->probe) 373 smp_ops->probe();
374 max_cpus = smp_ops->probe();
375 else
376 max_cpus = NR_CPUS;
377 else
378 max_cpus = 1;
379} 374}
380 375
381void smp_prepare_boot_cpu(void) 376void smp_prepare_boot_cpu(void)
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_booke.S
index 0f204053e5b5..553c1405ee05 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_booke.S
@@ -74,21 +74,21 @@ _GLOBAL(swsusp_arch_suspend)
74 bne 1b 74 bne 1b
75 75
76 /* Save SPRGs */ 76 /* Save SPRGs */
77 mfsprg r4,0 77 mfspr r4,SPRN_SPRG0
78 stw r4,SL_SPRG0(r11) 78 stw r4,SL_SPRG0(r11)
79 mfsprg r4,1 79 mfspr r4,SPRN_SPRG1
80 stw r4,SL_SPRG1(r11) 80 stw r4,SL_SPRG1(r11)
81 mfsprg r4,2 81 mfspr r4,SPRN_SPRG2
82 stw r4,SL_SPRG2(r11) 82 stw r4,SL_SPRG2(r11)
83 mfsprg r4,3 83 mfspr r4,SPRN_SPRG3
84 stw r4,SL_SPRG3(r11) 84 stw r4,SL_SPRG3(r11)
85 mfsprg r4,4 85 mfspr r4,SPRN_SPRG4
86 stw r4,SL_SPRG4(r11) 86 stw r4,SL_SPRG4(r11)
87 mfsprg r4,5 87 mfspr r4,SPRN_SPRG5
88 stw r4,SL_SPRG5(r11) 88 stw r4,SL_SPRG5(r11)
89 mfsprg r4,6 89 mfspr r4,SPRN_SPRG6
90 stw r4,SL_SPRG6(r11) 90 stw r4,SL_SPRG6(r11)
91 mfsprg r4,7 91 mfspr r4,SPRN_SPRG7
92 stw r4,SL_SPRG7(r11) 92 stw r4,SL_SPRG7(r11)
93 93
94 /* Call the low level suspend stuff (we should probably have made 94 /* Call the low level suspend stuff (we should probably have made
@@ -150,21 +150,21 @@ _GLOBAL(swsusp_arch_resume)
150 bl _tlbil_all 150 bl _tlbil_all
151 151
152 lwz r4,SL_SPRG0(r11) 152 lwz r4,SL_SPRG0(r11)
153 mtsprg 0,r4 153 mtspr SPRN_SPRG0,r4
154 lwz r4,SL_SPRG1(r11) 154 lwz r4,SL_SPRG1(r11)
155 mtsprg 1,r4 155 mtspr SPRN_SPRG1,r4
156 lwz r4,SL_SPRG2(r11) 156 lwz r4,SL_SPRG2(r11)
157 mtsprg 2,r4 157 mtspr SPRN_SPRG2,r4
158 lwz r4,SL_SPRG3(r11) 158 lwz r4,SL_SPRG3(r11)
159 mtsprg 3,r4 159 mtspr SPRN_SPRG3,r4
160 lwz r4,SL_SPRG4(r11) 160 lwz r4,SL_SPRG4(r11)
161 mtsprg 4,r4 161 mtspr SPRN_SPRG4,r4
162 lwz r4,SL_SPRG5(r11) 162 lwz r4,SL_SPRG5(r11)
163 mtsprg 5,r4 163 mtspr SPRN_SPRG5,r4
164 lwz r4,SL_SPRG6(r11) 164 lwz r4,SL_SPRG6(r11)
165 mtsprg 6,r4 165 mtspr SPRN_SPRG6,r4
166 lwz r4,SL_SPRG7(r11) 166 lwz r4,SL_SPRG7(r11)
167 mtsprg 7,r4 167 mtspr SPRN_SPRG7,r4
168 168
169 /* restore the MSR */ 169 /* restore the MSR */
170 lwz r3,SL_MSR(r11) 170 lwz r3,SL_MSR(r11)
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index 4e3cc47f26b9..cd9be9aa016d 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -34,7 +34,6 @@
34#include <linux/ipc.h> 34#include <linux/ipc.h>
35#include <linux/utsname.h> 35#include <linux/utsname.h>
36#include <linux/file.h> 36#include <linux/file.h>
37#include <linux/init.h>
38#include <linux/personality.h> 37#include <linux/personality.h>
39 38
40#include <asm/uaccess.h> 39#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index b4e667663d9b..97e1dc917683 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -51,8 +51,6 @@ static ssize_t store_smt_snooze_delay(struct device *dev,
51 return -EINVAL; 51 return -EINVAL;
52 52
53 per_cpu(smt_snooze_delay, cpu->dev.id) = snooze; 53 per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
54 update_smt_snooze_delay(cpu->dev.id, snooze);
55
56 return count; 54 return count;
57} 55}
58 56
@@ -86,6 +84,304 @@ __setup("smt-snooze-delay=", setup_smt_snooze_delay);
86 84
87#endif /* CONFIG_PPC64 */ 85#endif /* CONFIG_PPC64 */
88 86
87#ifdef CONFIG_PPC_FSL_BOOK3E
88#define MAX_BIT 63
89
90static u64 pw20_wt;
91static u64 altivec_idle_wt;
92
93static unsigned int get_idle_ticks_bit(u64 ns)
94{
95 u64 cycle;
96
97 if (ns >= 10000)
98 cycle = div_u64(ns + 500, 1000) * tb_ticks_per_usec;
99 else
100 cycle = div_u64(ns * tb_ticks_per_usec, 1000);
101
102 if (!cycle)
103 return 0;
104
105 return ilog2(cycle);
106}
107
108static void do_show_pwrmgtcr0(void *val)
109{
110 u32 *value = val;
111
112 *value = mfspr(SPRN_PWRMGTCR0);
113}
114
115static ssize_t show_pw20_state(struct device *dev,
116 struct device_attribute *attr, char *buf)
117{
118 u32 value;
119 unsigned int cpu = dev->id;
120
121 smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
122
123 value &= PWRMGTCR0_PW20_WAIT;
124
125 return sprintf(buf, "%u\n", value ? 1 : 0);
126}
127
128static void do_store_pw20_state(void *val)
129{
130 u32 *value = val;
131 u32 pw20_state;
132
133 pw20_state = mfspr(SPRN_PWRMGTCR0);
134
135 if (*value)
136 pw20_state |= PWRMGTCR0_PW20_WAIT;
137 else
138 pw20_state &= ~PWRMGTCR0_PW20_WAIT;
139
140 mtspr(SPRN_PWRMGTCR0, pw20_state);
141}
142
143static ssize_t store_pw20_state(struct device *dev,
144 struct device_attribute *attr,
145 const char *buf, size_t count)
146{
147 u32 value;
148 unsigned int cpu = dev->id;
149
150 if (kstrtou32(buf, 0, &value))
151 return -EINVAL;
152
153 if (value > 1)
154 return -EINVAL;
155
156 smp_call_function_single(cpu, do_store_pw20_state, &value, 1);
157
158 return count;
159}
160
161static ssize_t show_pw20_wait_time(struct device *dev,
162 struct device_attribute *attr, char *buf)
163{
164 u32 value;
165 u64 tb_cycle = 1;
166 u64 time;
167
168 unsigned int cpu = dev->id;
169
170 if (!pw20_wt) {
171 smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
172 value = (value & PWRMGTCR0_PW20_ENT) >>
173 PWRMGTCR0_PW20_ENT_SHIFT;
174
175 tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
176 /* convert ms to ns */
177 if (tb_ticks_per_usec > 1000) {
178 time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
179 } else {
180 u32 rem_us;
181
182 time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
183 &rem_us);
184 time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
185 }
186 } else {
187 time = pw20_wt;
188 }
189
190 return sprintf(buf, "%llu\n", time > 0 ? time : 0);
191}
192
193static void set_pw20_wait_entry_bit(void *val)
194{
195 u32 *value = val;
196 u32 pw20_idle;
197
198 pw20_idle = mfspr(SPRN_PWRMGTCR0);
199
200 /* Set Automatic PW20 Core Idle Count */
201 /* clear count */
202 pw20_idle &= ~PWRMGTCR0_PW20_ENT;
203
204 /* set count */
205 pw20_idle |= ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT);
206
207 mtspr(SPRN_PWRMGTCR0, pw20_idle);
208}
209
210static ssize_t store_pw20_wait_time(struct device *dev,
211 struct device_attribute *attr,
212 const char *buf, size_t count)
213{
214 u32 entry_bit;
215 u64 value;
216
217 unsigned int cpu = dev->id;
218
219 if (kstrtou64(buf, 0, &value))
220 return -EINVAL;
221
222 if (!value)
223 return -EINVAL;
224
225 entry_bit = get_idle_ticks_bit(value);
226 if (entry_bit > MAX_BIT)
227 return -EINVAL;
228
229 pw20_wt = value;
230
231 smp_call_function_single(cpu, set_pw20_wait_entry_bit,
232 &entry_bit, 1);
233
234 return count;
235}
236
237static ssize_t show_altivec_idle(struct device *dev,
238 struct device_attribute *attr, char *buf)
239{
240 u32 value;
241 unsigned int cpu = dev->id;
242
243 smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
244
245 value &= PWRMGTCR0_AV_IDLE_PD_EN;
246
247 return sprintf(buf, "%u\n", value ? 1 : 0);
248}
249
250static void do_store_altivec_idle(void *val)
251{
252 u32 *value = val;
253 u32 altivec_idle;
254
255 altivec_idle = mfspr(SPRN_PWRMGTCR0);
256
257 if (*value)
258 altivec_idle |= PWRMGTCR0_AV_IDLE_PD_EN;
259 else
260 altivec_idle &= ~PWRMGTCR0_AV_IDLE_PD_EN;
261
262 mtspr(SPRN_PWRMGTCR0, altivec_idle);
263}
264
265static ssize_t store_altivec_idle(struct device *dev,
266 struct device_attribute *attr,
267 const char *buf, size_t count)
268{
269 u32 value;
270 unsigned int cpu = dev->id;
271
272 if (kstrtou32(buf, 0, &value))
273 return -EINVAL;
274
275 if (value > 1)
276 return -EINVAL;
277
278 smp_call_function_single(cpu, do_store_altivec_idle, &value, 1);
279
280 return count;
281}
282
283static ssize_t show_altivec_idle_wait_time(struct device *dev,
284 struct device_attribute *attr, char *buf)
285{
286 u32 value;
287 u64 tb_cycle = 1;
288 u64 time;
289
290 unsigned int cpu = dev->id;
291
292 if (!altivec_idle_wt) {
293 smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
294 value = (value & PWRMGTCR0_AV_IDLE_CNT) >>
295 PWRMGTCR0_AV_IDLE_CNT_SHIFT;
296
297 tb_cycle = (tb_cycle << (MAX_BIT - value + 1));
298 /* convert ms to ns */
299 if (tb_ticks_per_usec > 1000) {
300 time = div_u64(tb_cycle, tb_ticks_per_usec / 1000);
301 } else {
302 u32 rem_us;
303
304 time = div_u64_rem(tb_cycle, tb_ticks_per_usec,
305 &rem_us);
306 time = time * 1000 + rem_us * 1000 / tb_ticks_per_usec;
307 }
308 } else {
309 time = altivec_idle_wt;
310 }
311
312 return sprintf(buf, "%llu\n", time > 0 ? time : 0);
313}
314
315static void set_altivec_idle_wait_entry_bit(void *val)
316{
317 u32 *value = val;
318 u32 altivec_idle;
319
320 altivec_idle = mfspr(SPRN_PWRMGTCR0);
321
322 /* Set Automatic AltiVec Idle Count */
323 /* clear count */
324 altivec_idle &= ~PWRMGTCR0_AV_IDLE_CNT;
325
326 /* set count */
327 altivec_idle |= ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT);
328
329 mtspr(SPRN_PWRMGTCR0, altivec_idle);
330}
331
332static ssize_t store_altivec_idle_wait_time(struct device *dev,
333 struct device_attribute *attr,
334 const char *buf, size_t count)
335{
336 u32 entry_bit;
337 u64 value;
338
339 unsigned int cpu = dev->id;
340
341 if (kstrtou64(buf, 0, &value))
342 return -EINVAL;
343
344 if (!value)
345 return -EINVAL;
346
347 entry_bit = get_idle_ticks_bit(value);
348 if (entry_bit > MAX_BIT)
349 return -EINVAL;
350
351 altivec_idle_wt = value;
352
353 smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit,
354 &entry_bit, 1);
355
356 return count;
357}
358
359/*
360 * Enable/Disable interface:
361 * 0, disable. 1, enable.
362 */
363static DEVICE_ATTR(pw20_state, 0600, show_pw20_state, store_pw20_state);
364static DEVICE_ATTR(altivec_idle, 0600, show_altivec_idle, store_altivec_idle);
365
366/*
367 * Set wait time interface:(Nanosecond)
368 * Example: Base on TBfreq is 41MHZ.
369 * 1~48(ns): TB[63]
370 * 49~97(ns): TB[62]
371 * 98~195(ns): TB[61]
372 * 196~390(ns): TB[60]
373 * 391~780(ns): TB[59]
374 * 781~1560(ns): TB[58]
375 * ...
376 */
377static DEVICE_ATTR(pw20_wait_time, 0600,
378 show_pw20_wait_time,
379 store_pw20_wait_time);
380static DEVICE_ATTR(altivec_idle_wait_time, 0600,
381 show_altivec_idle_wait_time,
382 store_altivec_idle_wait_time);
383#endif
384
89/* 385/*
90 * Enabling PMCs will slow partition context switch times so we only do 386 * Enabling PMCs will slow partition context switch times so we only do
91 * it the first time we write to the PMCs. 387 * it the first time we write to the PMCs.
@@ -108,14 +404,14 @@ void ppc_enable_pmcs(void)
108} 404}
109EXPORT_SYMBOL(ppc_enable_pmcs); 405EXPORT_SYMBOL(ppc_enable_pmcs);
110 406
111#define SYSFS_PMCSETUP(NAME, ADDRESS) \ 407#define __SYSFS_SPRSETUP(NAME, ADDRESS, EXTRA) \
112static void read_##NAME(void *val) \ 408static void read_##NAME(void *val) \
113{ \ 409{ \
114 *(unsigned long *)val = mfspr(ADDRESS); \ 410 *(unsigned long *)val = mfspr(ADDRESS); \
115} \ 411} \
116static void write_##NAME(void *val) \ 412static void write_##NAME(void *val) \
117{ \ 413{ \
118 ppc_enable_pmcs(); \ 414 EXTRA; \
119 mtspr(ADDRESS, *(unsigned long *)val); \ 415 mtspr(ADDRESS, *(unsigned long *)val); \
120} \ 416} \
121static ssize_t show_##NAME(struct device *dev, \ 417static ssize_t show_##NAME(struct device *dev, \
@@ -140,6 +436,10 @@ static ssize_t __used \
140 return count; \ 436 return count; \
141} 437}
142 438
439#define SYSFS_PMCSETUP(NAME, ADDRESS) \
440 __SYSFS_SPRSETUP(NAME, ADDRESS, ppc_enable_pmcs())
441#define SYSFS_SPRSETUP(NAME, ADDRESS) \
442 __SYSFS_SPRSETUP(NAME, ADDRESS, )
143 443
144/* Let's define all possible registers, we'll only hook up the ones 444/* Let's define all possible registers, we'll only hook up the ones
145 * that are implemented on the current processor 445 * that are implemented on the current processor
@@ -175,10 +475,10 @@ SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
175SYSFS_PMCSETUP(pmc8, SPRN_PMC8); 475SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
176 476
177SYSFS_PMCSETUP(mmcra, SPRN_MMCRA); 477SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
178SYSFS_PMCSETUP(purr, SPRN_PURR); 478SYSFS_SPRSETUP(purr, SPRN_PURR);
179SYSFS_PMCSETUP(spurr, SPRN_SPURR); 479SYSFS_SPRSETUP(spurr, SPRN_SPURR);
180SYSFS_PMCSETUP(dscr, SPRN_DSCR); 480SYSFS_SPRSETUP(dscr, SPRN_DSCR);
181SYSFS_PMCSETUP(pir, SPRN_PIR); 481SYSFS_SPRSETUP(pir, SPRN_PIR);
182 482
183/* 483/*
184 Lets only enable read for phyp resources and 484 Lets only enable read for phyp resources and
@@ -249,34 +549,34 @@ SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
249SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4); 549SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
250SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5); 550SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
251#ifdef CONFIG_DEBUG_KERNEL 551#ifdef CONFIG_DEBUG_KERNEL
252SYSFS_PMCSETUP(hid0, SPRN_HID0); 552SYSFS_SPRSETUP(hid0, SPRN_HID0);
253SYSFS_PMCSETUP(hid1, SPRN_HID1); 553SYSFS_SPRSETUP(hid1, SPRN_HID1);
254SYSFS_PMCSETUP(hid4, SPRN_HID4); 554SYSFS_SPRSETUP(hid4, SPRN_HID4);
255SYSFS_PMCSETUP(hid5, SPRN_HID5); 555SYSFS_SPRSETUP(hid5, SPRN_HID5);
256SYSFS_PMCSETUP(ima0, SPRN_PA6T_IMA0); 556SYSFS_SPRSETUP(ima0, SPRN_PA6T_IMA0);
257SYSFS_PMCSETUP(ima1, SPRN_PA6T_IMA1); 557SYSFS_SPRSETUP(ima1, SPRN_PA6T_IMA1);
258SYSFS_PMCSETUP(ima2, SPRN_PA6T_IMA2); 558SYSFS_SPRSETUP(ima2, SPRN_PA6T_IMA2);
259SYSFS_PMCSETUP(ima3, SPRN_PA6T_IMA3); 559SYSFS_SPRSETUP(ima3, SPRN_PA6T_IMA3);
260SYSFS_PMCSETUP(ima4, SPRN_PA6T_IMA4); 560SYSFS_SPRSETUP(ima4, SPRN_PA6T_IMA4);
261SYSFS_PMCSETUP(ima5, SPRN_PA6T_IMA5); 561SYSFS_SPRSETUP(ima5, SPRN_PA6T_IMA5);
262SYSFS_PMCSETUP(ima6, SPRN_PA6T_IMA6); 562SYSFS_SPRSETUP(ima6, SPRN_PA6T_IMA6);
263SYSFS_PMCSETUP(ima7, SPRN_PA6T_IMA7); 563SYSFS_SPRSETUP(ima7, SPRN_PA6T_IMA7);
264SYSFS_PMCSETUP(ima8, SPRN_PA6T_IMA8); 564SYSFS_SPRSETUP(ima8, SPRN_PA6T_IMA8);
265SYSFS_PMCSETUP(ima9, SPRN_PA6T_IMA9); 565SYSFS_SPRSETUP(ima9, SPRN_PA6T_IMA9);
266SYSFS_PMCSETUP(imaat, SPRN_PA6T_IMAAT); 566SYSFS_SPRSETUP(imaat, SPRN_PA6T_IMAAT);
267SYSFS_PMCSETUP(btcr, SPRN_PA6T_BTCR); 567SYSFS_SPRSETUP(btcr, SPRN_PA6T_BTCR);
268SYSFS_PMCSETUP(pccr, SPRN_PA6T_PCCR); 568SYSFS_SPRSETUP(pccr, SPRN_PA6T_PCCR);
269SYSFS_PMCSETUP(rpccr, SPRN_PA6T_RPCCR); 569SYSFS_SPRSETUP(rpccr, SPRN_PA6T_RPCCR);
270SYSFS_PMCSETUP(der, SPRN_PA6T_DER); 570SYSFS_SPRSETUP(der, SPRN_PA6T_DER);
271SYSFS_PMCSETUP(mer, SPRN_PA6T_MER); 571SYSFS_SPRSETUP(mer, SPRN_PA6T_MER);
272SYSFS_PMCSETUP(ber, SPRN_PA6T_BER); 572SYSFS_SPRSETUP(ber, SPRN_PA6T_BER);
273SYSFS_PMCSETUP(ier, SPRN_PA6T_IER); 573SYSFS_SPRSETUP(ier, SPRN_PA6T_IER);
274SYSFS_PMCSETUP(sier, SPRN_PA6T_SIER); 574SYSFS_SPRSETUP(sier, SPRN_PA6T_SIER);
275SYSFS_PMCSETUP(siar, SPRN_PA6T_SIAR); 575SYSFS_SPRSETUP(siar, SPRN_PA6T_SIAR);
276SYSFS_PMCSETUP(tsr0, SPRN_PA6T_TSR0); 576SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
277SYSFS_PMCSETUP(tsr1, SPRN_PA6T_TSR1); 577SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
278SYSFS_PMCSETUP(tsr2, SPRN_PA6T_TSR2); 578SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
279SYSFS_PMCSETUP(tsr3, SPRN_PA6T_TSR3); 579SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
280#endif /* CONFIG_DEBUG_KERNEL */ 580#endif /* CONFIG_DEBUG_KERNEL */
281#endif /* HAS_PPC_PMC_PA6T */ 581#endif /* HAS_PPC_PMC_PA6T */
282 582
@@ -421,6 +721,15 @@ static void register_cpu_online(unsigned int cpu)
421 device_create_file(s, &dev_attr_pir); 721 device_create_file(s, &dev_attr_pir);
422#endif /* CONFIG_PPC64 */ 722#endif /* CONFIG_PPC64 */
423 723
724#ifdef CONFIG_PPC_FSL_BOOK3E
725 if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
726 device_create_file(s, &dev_attr_pw20_state);
727 device_create_file(s, &dev_attr_pw20_wait_time);
728
729 device_create_file(s, &dev_attr_altivec_idle);
730 device_create_file(s, &dev_attr_altivec_idle_wait_time);
731 }
732#endif
424 cacheinfo_cpu_online(cpu); 733 cacheinfo_cpu_online(cpu);
425} 734}
426 735
@@ -493,6 +802,15 @@ static void unregister_cpu_online(unsigned int cpu)
493 device_remove_file(s, &dev_attr_pir); 802 device_remove_file(s, &dev_attr_pir);
494#endif /* CONFIG_PPC64 */ 803#endif /* CONFIG_PPC64 */
495 804
805#ifdef CONFIG_PPC_FSL_BOOK3E
806 if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
807 device_remove_file(s, &dev_attr_pw20_state);
808 device_remove_file(s, &dev_attr_pw20_wait_time);
809
810 device_remove_file(s, &dev_attr_altivec_idle);
811 device_remove_file(s, &dev_attr_altivec_idle_wait_time);
812 }
813#endif
496 cacheinfo_cpu_offline(cpu); 814 cacheinfo_cpu_offline(cpu);
497} 815}
498 816
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b3b144121cc9..b3dab20acf34 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -510,7 +510,6 @@ void timer_interrupt(struct pt_regs * regs)
510 */ 510 */
511 may_hard_irq_enable(); 511 may_hard_irq_enable();
512 512
513 __get_cpu_var(irq_stat).timer_irqs++;
514 513
515#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC) 514#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
516 if (atomic_read(&ppc_n_lost_interrupts) != 0) 515 if (atomic_read(&ppc_n_lost_interrupts) != 0)
@@ -532,10 +531,15 @@ void timer_interrupt(struct pt_regs * regs)
532 *next_tb = ~(u64)0; 531 *next_tb = ~(u64)0;
533 if (evt->event_handler) 532 if (evt->event_handler)
534 evt->event_handler(evt); 533 evt->event_handler(evt);
534 __get_cpu_var(irq_stat).timer_irqs_event++;
535 } else { 535 } else {
536 now = *next_tb - now; 536 now = *next_tb - now;
537 if (now <= DECREMENTER_MAX) 537 if (now <= DECREMENTER_MAX)
538 set_dec((int)now); 538 set_dec((int)now);
539 /* We may have raced with new irq work */
540 if (test_irq_work_pending())
541 set_dec(1);
542 __get_cpu_var(irq_stat).timer_irqs_others++;
539 } 543 }
540 544
541#ifdef CONFIG_PPC64 545#ifdef CONFIG_PPC64
@@ -801,8 +805,16 @@ static void __init clocksource_init(void)
801static int decrementer_set_next_event(unsigned long evt, 805static int decrementer_set_next_event(unsigned long evt,
802 struct clock_event_device *dev) 806 struct clock_event_device *dev)
803{ 807{
808 /* Don't adjust the decrementer if some irq work is pending */
809 if (test_irq_work_pending())
810 return 0;
804 __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt; 811 __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
805 set_dec(evt); 812 set_dec(evt);
813
814 /* We may have raced with new irq work */
815 if (test_irq_work_pending())
816 set_dec(1);
817
806 return 0; 818 return 0;
807} 819}
808 820
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 907a472f9a9e..33cd7a0b8e73 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -285,6 +285,21 @@ void system_reset_exception(struct pt_regs *regs)
285 285
286 /* What should we do here? We could issue a shutdown or hard reset. */ 286 /* What should we do here? We could issue a shutdown or hard reset. */
287} 287}
288
289/*
290 * This function is called in real mode. Strictly no printk's please.
291 *
292 * regs->nip and regs->msr contains srr0 and ssr1.
293 */
294long machine_check_early(struct pt_regs *regs)
295{
296 long handled = 0;
297
298 if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
299 handled = cur_cpu_spec->machine_check_early(regs);
300 return handled;
301}
302
288#endif 303#endif
289 304
290/* 305/*
@@ -1384,7 +1399,6 @@ void fp_unavailable_tm(struct pt_regs *regs)
1384 1399
1385 TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n", 1400 TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
1386 regs->nip, regs->msr); 1401 regs->nip, regs->msr);
1387 tm_enable();
1388 1402
1389 /* We can only have got here if the task started using FP after 1403 /* We can only have got here if the task started using FP after
1390 * beginning the transaction. So, the transactional regs are just a 1404 * beginning the transaction. So, the transactional regs are just a
@@ -1393,8 +1407,7 @@ void fp_unavailable_tm(struct pt_regs *regs)
1393 * transaction, and probably retry but now with FP enabled. So the 1407 * transaction, and probably retry but now with FP enabled. So the
1394 * checkpointed FP registers need to be loaded. 1408 * checkpointed FP registers need to be loaded.
1395 */ 1409 */
1396 tm_reclaim(&current->thread, current->thread.regs->msr, 1410 tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1397 TM_CAUSE_FAC_UNAV);
1398 /* Reclaim didn't save out any FPRs to transact_fprs. */ 1411 /* Reclaim didn't save out any FPRs to transact_fprs. */
1399 1412
1400 /* Enable FP for the task: */ 1413 /* Enable FP for the task: */
@@ -1403,11 +1416,19 @@ void fp_unavailable_tm(struct pt_regs *regs)
1403 /* This loads and recheckpoints the FP registers from 1416 /* This loads and recheckpoints the FP registers from
1404 * thread.fpr[]. They will remain in registers after the 1417 * thread.fpr[]. They will remain in registers after the
1405 * checkpoint so we don't need to reload them after. 1418 * checkpoint so we don't need to reload them after.
1419 * If VMX is in use, the VRs now hold checkpointed values,
1420 * so we don't want to load the VRs from the thread_struct.
1406 */ 1421 */
1407 tm_recheckpoint(&current->thread, regs->msr); 1422 tm_recheckpoint(&current->thread, MSR_FP);
1423
1424 /* If VMX is in use, get the transactional values back */
1425 if (regs->msr & MSR_VEC) {
1426 do_load_up_transact_altivec(&current->thread);
1427 /* At this point all the VSX state is loaded, so enable it */
1428 regs->msr |= MSR_VSX;
1429 }
1408} 1430}
1409 1431
1410#ifdef CONFIG_ALTIVEC
1411void altivec_unavailable_tm(struct pt_regs *regs) 1432void altivec_unavailable_tm(struct pt_regs *regs)
1412{ 1433{
1413 /* See the comments in fp_unavailable_tm(). This function operates 1434 /* See the comments in fp_unavailable_tm(). This function operates
@@ -1417,18 +1438,21 @@ void altivec_unavailable_tm(struct pt_regs *regs)
1417 TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx," 1438 TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
1418 "MSR=%lx\n", 1439 "MSR=%lx\n",
1419 regs->nip, regs->msr); 1440 regs->nip, regs->msr);
1420 tm_enable(); 1441 tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1421 tm_reclaim(&current->thread, current->thread.regs->msr,
1422 TM_CAUSE_FAC_UNAV);
1423 regs->msr |= MSR_VEC; 1442 regs->msr |= MSR_VEC;
1424 tm_recheckpoint(&current->thread, regs->msr); 1443 tm_recheckpoint(&current->thread, MSR_VEC);
1425 current->thread.used_vr = 1; 1444 current->thread.used_vr = 1;
1445
1446 if (regs->msr & MSR_FP) {
1447 do_load_up_transact_fpu(&current->thread);
1448 regs->msr |= MSR_VSX;
1449 }
1426} 1450}
1427#endif
1428 1451
1429#ifdef CONFIG_VSX
1430void vsx_unavailable_tm(struct pt_regs *regs) 1452void vsx_unavailable_tm(struct pt_regs *regs)
1431{ 1453{
1454 unsigned long orig_msr = regs->msr;
1455
1432 /* See the comments in fp_unavailable_tm(). This works similarly, 1456 /* See the comments in fp_unavailable_tm(). This works similarly,
1433 * though we're loading both FP and VEC registers in here. 1457 * though we're loading both FP and VEC registers in here.
1434 * 1458 *
@@ -1440,18 +1464,30 @@ void vsx_unavailable_tm(struct pt_regs *regs)
1440 "MSR=%lx\n", 1464 "MSR=%lx\n",
1441 regs->nip, regs->msr); 1465 regs->nip, regs->msr);
1442 1466
1443 tm_enable(); 1467 current->thread.used_vsr = 1;
1468
1469 /* If FP and VMX are already loaded, we have all the state we need */
1470 if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) {
1471 regs->msr |= MSR_VSX;
1472 return;
1473 }
1474
1444 /* This reclaims FP and/or VR regs if they're already enabled */ 1475 /* This reclaims FP and/or VR regs if they're already enabled */
1445 tm_reclaim(&current->thread, current->thread.regs->msr, 1476 tm_reclaim_current(TM_CAUSE_FAC_UNAV);
1446 TM_CAUSE_FAC_UNAV);
1447 1477
1448 regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode | 1478 regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |
1449 MSR_VSX; 1479 MSR_VSX;
1450 /* This loads & recheckpoints FP and VRs. */ 1480
1451 tm_recheckpoint(&current->thread, regs->msr); 1481 /* This loads & recheckpoints FP and VRs; but we have
1452 current->thread.used_vsr = 1; 1482 * to be sure not to overwrite previously-valid state.
1483 */
1484 tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
1485
1486 if (orig_msr & MSR_FP)
1487 do_load_up_transact_fpu(&current->thread);
1488 if (orig_msr & MSR_VEC)
1489 do_load_up_transact_altivec(&current->thread);
1453} 1490}
1454#endif
1455#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 1491#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
1456 1492
1457void performance_monitor_exception(struct pt_regs *regs) 1493void performance_monitor_exception(struct pt_regs *regs)
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 59f419b935f2..003b20964ea0 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -186,7 +186,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
186 * emulate_step() returns 1 if the insn was successfully emulated. 186 * emulate_step() returns 1 if the insn was successfully emulated.
187 * For all other cases, we need to single-step in hardware. 187 * For all other cases, we need to single-step in hardware.
188 */ 188 */
189 ret = emulate_step(regs, auprobe->ainsn); 189 ret = emulate_step(regs, auprobe->insn);
190 if (ret > 0) 190 if (ret > 0)
191 return true; 191 return true;
192 192
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
index 6e8f507ed32b..6ac107ac402a 100644
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
@@ -1,4 +1,3 @@
1#include <linux/init.h>
2#include <linux/linkage.h> 1#include <linux/linkage.h>
3#include <asm/page.h> 2#include <asm/page.h>
4 3
@@ -7,7 +6,7 @@
7 .globl vdso32_start, vdso32_end 6 .globl vdso32_start, vdso32_end
8 .balign PAGE_SIZE 7 .balign PAGE_SIZE
9vdso32_start: 8vdso32_start:
10 .incbin "arch/powerpc/kernel/vdso32/vdso32.so" 9 .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg"
11 .balign PAGE_SIZE 10 .balign PAGE_SIZE
12vdso32_end: 11vdso32_end:
13 12
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
index b8553d62b792..df60fca6a13d 100644
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
@@ -1,4 +1,3 @@
1#include <linux/init.h>
2#include <linux/linkage.h> 1#include <linux/linkage.h>
3#include <asm/page.h> 2#include <asm/page.h>
4 3
@@ -7,7 +6,7 @@
7 .globl vdso64_start, vdso64_end 6 .globl vdso64_start, vdso64_end
8 .balign PAGE_SIZE 7 .balign PAGE_SIZE
9vdso64_start: 8vdso64_start:
10 .incbin "arch/powerpc/kernel/vdso64/vdso64.so" 9 .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg"
11 .balign PAGE_SIZE 10 .balign PAGE_SIZE
12vdso64_end: 11vdso64_end:
13 12
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 0458a9aaba9d..74f8050518d6 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -37,6 +37,16 @@ _GLOBAL(do_load_up_transact_altivec)
37#endif 37#endif
38 38
39/* 39/*
40 * Enable use of VMX/Altivec for the caller.
41 */
42_GLOBAL(vec_enable)
43 mfmsr r3
44 oris r3,r3,MSR_VEC@h
45 MTMSRD(r3)
46 isync
47 blr
48
49/*
40 * Load state from memory into VMX registers including VSCR. 50 * Load state from memory into VMX registers including VSCR.
41 * Assumes the caller has enabled VMX in the MSR. 51 * Assumes the caller has enabled VMX in the MSR.
42 */ 52 */
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 76a64821f4a2..826d8bd9e522 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -518,16 +518,18 @@ static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
518 struct dma_attrs *attrs) 518 struct dma_attrs *attrs)
519{ 519{
520 struct vio_dev *viodev = to_vio_dev(dev); 520 struct vio_dev *viodev = to_vio_dev(dev);
521 struct iommu_table *tbl;
521 dma_addr_t ret = DMA_ERROR_CODE; 522 dma_addr_t ret = DMA_ERROR_CODE;
522 523
523 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) { 524 tbl = get_iommu_table_base(dev);
525 if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
524 atomic_inc(&viodev->cmo.allocs_failed); 526 atomic_inc(&viodev->cmo.allocs_failed);
525 return ret; 527 return ret;
526 } 528 }
527 529
528 ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs); 530 ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
529 if (unlikely(dma_mapping_error(dev, ret))) { 531 if (unlikely(dma_mapping_error(dev, ret))) {
530 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); 532 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
531 atomic_inc(&viodev->cmo.allocs_failed); 533 atomic_inc(&viodev->cmo.allocs_failed);
532 } 534 }
533 535
@@ -540,10 +542,12 @@ static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
540 struct dma_attrs *attrs) 542 struct dma_attrs *attrs)
541{ 543{
542 struct vio_dev *viodev = to_vio_dev(dev); 544 struct vio_dev *viodev = to_vio_dev(dev);
545 struct iommu_table *tbl;
543 546
547 tbl = get_iommu_table_base(dev);
544 dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs); 548 dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
545 549
546 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE)); 550 vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
547} 551}
548 552
549static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist, 553static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
@@ -551,12 +555,14 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
551 struct dma_attrs *attrs) 555 struct dma_attrs *attrs)
552{ 556{
553 struct vio_dev *viodev = to_vio_dev(dev); 557 struct vio_dev *viodev = to_vio_dev(dev);
558 struct iommu_table *tbl;
554 struct scatterlist *sgl; 559 struct scatterlist *sgl;
555 int ret, count = 0; 560 int ret, count = 0;
556 size_t alloc_size = 0; 561 size_t alloc_size = 0;
557 562
563 tbl = get_iommu_table_base(dev);
558 for (sgl = sglist; count < nelems; count++, sgl++) 564 for (sgl = sglist; count < nelems; count++, sgl++)
559 alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE); 565 alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
560 566
561 if (vio_cmo_alloc(viodev, alloc_size)) { 567 if (vio_cmo_alloc(viodev, alloc_size)) {
562 atomic_inc(&viodev->cmo.allocs_failed); 568 atomic_inc(&viodev->cmo.allocs_failed);
@@ -572,7 +578,7 @@ static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
572 } 578 }
573 579
574 for (sgl = sglist, count = 0; count < ret; count++, sgl++) 580 for (sgl = sglist, count = 0; count < ret; count++, sgl++)
575 alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE); 581 alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
576 if (alloc_size) 582 if (alloc_size)
577 vio_cmo_dealloc(viodev, alloc_size); 583 vio_cmo_dealloc(viodev, alloc_size);
578 584
@@ -585,12 +591,14 @@ static void vio_dma_iommu_unmap_sg(struct device *dev,
585 struct dma_attrs *attrs) 591 struct dma_attrs *attrs)
586{ 592{
587 struct vio_dev *viodev = to_vio_dev(dev); 593 struct vio_dev *viodev = to_vio_dev(dev);
594 struct iommu_table *tbl;
588 struct scatterlist *sgl; 595 struct scatterlist *sgl;
589 size_t alloc_size = 0; 596 size_t alloc_size = 0;
590 int count = 0; 597 int count = 0;
591 598
599 tbl = get_iommu_table_base(dev);
592 for (sgl = sglist; count < nelems; count++, sgl++) 600 for (sgl = sglist; count < nelems; count++, sgl++)
593 alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE); 601 alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
594 602
595 dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs); 603 dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
596 604
@@ -706,11 +714,14 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)
706{ 714{
707 struct vio_cmo_dev_entry *dev_ent; 715 struct vio_cmo_dev_entry *dev_ent;
708 struct device *dev = &viodev->dev; 716 struct device *dev = &viodev->dev;
717 struct iommu_table *tbl;
709 struct vio_driver *viodrv = to_vio_driver(dev->driver); 718 struct vio_driver *viodrv = to_vio_driver(dev->driver);
710 unsigned long flags; 719 unsigned long flags;
711 size_t size; 720 size_t size;
712 bool dma_capable = false; 721 bool dma_capable = false;
713 722
723 tbl = get_iommu_table_base(dev);
724
714 /* A device requires entitlement if it has a DMA window property */ 725 /* A device requires entitlement if it has a DMA window property */
715 switch (viodev->family) { 726 switch (viodev->family) {
716 case VDEVICE: 727 case VDEVICE:
@@ -736,7 +747,8 @@ static int vio_cmo_bus_probe(struct vio_dev *viodev)
736 return -EINVAL; 747 return -EINVAL;
737 } 748 }
738 749
739 viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev)); 750 viodev->cmo.desired =
751 IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
740 if (viodev->cmo.desired < VIO_CMO_MIN_ENT) 752 if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
741 viodev->cmo.desired = VIO_CMO_MIN_ENT; 753 viodev->cmo.desired = VIO_CMO_MIN_ENT;
742 size = VIO_CMO_MIN_ENT; 754 size = VIO_CMO_MIN_ENT;
@@ -1176,9 +1188,10 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
1176 &tbl->it_index, &offset, &size); 1188 &tbl->it_index, &offset, &size);
1177 1189
1178 /* TCE table size - measured in tce entries */ 1190 /* TCE table size - measured in tce entries */
1179 tbl->it_size = size >> IOMMU_PAGE_SHIFT; 1191 tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
1192 tbl->it_size = size >> tbl->it_page_shift;
1180 /* offset for VIO should always be 0 */ 1193 /* offset for VIO should always be 0 */
1181 tbl->it_offset = offset >> IOMMU_PAGE_SHIFT; 1194 tbl->it_offset = offset >> tbl->it_page_shift;
1182 tbl->it_busno = 0; 1195 tbl->it_busno = 0;
1183 tbl->it_type = TCE_VB; 1196 tbl->it_type = TCE_VB;
1184 tbl->it_blocksize = 16; 1197 tbl->it_blocksize = 16;